From 9a21e81abdecac3212fb1117d79be5b0a5770efe Mon Sep 17 00:00:00 2001
From: Gaiejj <524339208@qq.com>
Date: Wed, 17 Apr 2024 11:53:36 +0800
Subject: [PATCH 01/18] feat: support cbf methods

---
 omnisafe/adapter/__init__.py                  |   1 +
 omnisafe/adapter/barrier_function_adapter.py  | 219 ++++++++
 .../adapter/beta_barrier_function_adapter.py  | 245 +++++++++
 .../offpolicy_barrier_function_adapter.py     | 151 ++++++
 .../robust_barrier_function_adapter.py        | 174 ++++++
 omnisafe/algorithms/__init__.py               |   4 +
 omnisafe/algorithms/off_policy/__init__.py    |  15 +-
 omnisafe/algorithms/off_policy/ddpg_cbf.py    |  93 ++++
 omnisafe/algorithms/off_policy/sac_rcbf.py    | 175 ++++++
 omnisafe/algorithms/on_policy/__init__.py     |   3 +
 .../on_policy/barrier_function/__init__.py    |  24 +
 .../on_policy/barrier_function/ppo_cbf.py     | 106 ++++
 .../on_policy/barrier_function/trpo_cbf.py    | 117 ++++
 omnisafe/common/barrier_comp.py               |  86 +++
 omnisafe/common/barrier_solver.py             | 251 +++++++++
 omnisafe/common/buffer/onpolicy_buffer.py     |  12 +-
 .../common/buffer/vector_onpolicy_buffer.py   |  17 +
 omnisafe/common/robust_barrier_solver.py      | 428 +++++++++++++++
 omnisafe/common/robust_gp_model.py            | 498 ++++++++++++++++++
 omnisafe/common/utils.py                      | 215 ++++++++
 omnisafe/configs/off-policy/DDPGCBF.yaml      | 171 ++++++
 omnisafe/configs/off-policy/SACRCBF.yaml      | 148 ++++++
 omnisafe/configs/on-policy/IPO.yaml           |  20 +-
 omnisafe/configs/on-policy/PPOBetaCBF.yaml    | 120 +++++
 omnisafe/configs/on-policy/TRPO.yaml          |  32 ++
 omnisafe/configs/on-policy/TRPOCBF.yaml       | 139 +++++
 omnisafe/envs/__init__.py                     |   2 +
 omnisafe/envs/barrier_function_env.py         | 209 ++++++++
 omnisafe/envs/robust_barrier_function_env.py  | 224 ++++++++
 omnisafe/envs/unicycle_env.py                 | 366 +++++++++++++
 omnisafe/models/actor/actor_builder.py        |   9 +
 omnisafe/models/actor/beta_learning_actor.py  | 144 +++++
 omnisafe/typing.py                            |   2 +-
 33 files changed, 4386 insertions(+), 34 deletions(-)
 create mode 100644 omnisafe/adapter/barrier_function_adapter.py
 create mode 100644 omnisafe/adapter/beta_barrier_function_adapter.py
 create mode 100644 omnisafe/adapter/offpolicy_barrier_function_adapter.py
 create mode 100644 omnisafe/adapter/robust_barrier_function_adapter.py
 create mode 100644 omnisafe/algorithms/off_policy/ddpg_cbf.py
 create mode 100644 omnisafe/algorithms/off_policy/sac_rcbf.py
 create mode 100644 omnisafe/algorithms/on_policy/barrier_function/__init__.py
 create mode 100644 omnisafe/algorithms/on_policy/barrier_function/ppo_cbf.py
 create mode 100644 omnisafe/algorithms/on_policy/barrier_function/trpo_cbf.py
 create mode 100644 omnisafe/common/barrier_comp.py
 create mode 100644 omnisafe/common/barrier_solver.py
 create mode 100644 omnisafe/common/robust_barrier_solver.py
 create mode 100644 omnisafe/common/robust_gp_model.py
 create mode 100644 omnisafe/common/utils.py
 create mode 100644 omnisafe/configs/off-policy/DDPGCBF.yaml
 create mode 100644 omnisafe/configs/off-policy/SACRCBF.yaml
 create mode 100644 omnisafe/configs/on-policy/PPOBetaCBF.yaml
 create mode 100644 omnisafe/configs/on-policy/TRPOCBF.yaml
 create mode 100644 omnisafe/envs/barrier_function_env.py
 create mode 100644 omnisafe/envs/robust_barrier_function_env.py
 create mode 100644 omnisafe/envs/unicycle_env.py
 create mode 100644 omnisafe/models/actor/beta_learning_actor.py

diff --git a/omnisafe/adapter/__init__.py b/omnisafe/adapter/__init__.py
index ba768a7eb..75d4539ba 100644
--- a/omnisafe/adapter/__init__.py
+++ b/omnisafe/adapter/__init__.py
@@ -22,3 +22,4 @@
 from omnisafe.adapter.onpolicy_adapter import OnPolicyAdapter
 from omnisafe.adapter.saute_adapter import SauteAdapter
 from omnisafe.adapter.simmer_adapter import SimmerAdapter
+from omnisafe.adapter.beta_barrier_function_adapter import BetaBarrierFunctionAdapter
diff --git a/omnisafe/adapter/barrier_function_adapter.py b/omnisafe/adapter/barrier_function_adapter.py
new file mode 100644
index 000000000..47fa9b871
--- /dev/null
+++ b/omnisafe/adapter/barrier_function_adapter.py
@@ -0,0 +1,219 @@
+# Copyright 2023 OmniSafe Team. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""BarrierFunction Adapter for OmniSafe."""
+
+from __future__ import annotations
+
+import torch
+from rich.progress import track
+
+from omnisafe.adapter.onpolicy_adapter import OnPolicyAdapter
+from omnisafe.common.buffer import VectorOnPolicyBuffer
+from omnisafe.common.logger import Logger
+from omnisafe.models.actor_critic.constraint_actor_critic import ConstraintActorCritic
+from omnisafe.utils.config import Config
+from omnisafe.common.barrier_solver import PendulumSolver
+from omnisafe.common.barrier_comp import BarrierCompensator
+
+from omnisafe.envs.wrapper import (
+    AutoReset,
+    CostNormalize,
+    RewardNormalize,
+    TimeLimit,
+    Unsqueeze,
+)
+
+class BarrierFunctionAdapter(OnPolicyAdapter):
+    """BarrierFunction Adapter for OmniSafe.
+
+    The BarrierFunction Adapter is used to establish the logic of interaction between agents and the 
+    environment based on control barrier functions. Its key feature is the introduction of action 
+    compensators and barrier function solvers.
+
+    Args:
+        env_id (str): The environment id.
+        num_envs (int): The number of parallel environments.
+        seed (int): The random seed.
+        cfgs (Config): The configuration passed from yaml file.
+    """
+
+    def __init__(self, env_id: str, num_envs: int, seed: int, cfgs: Config) -> None:
+        """Initialize an instance of :class:`BarrierFunctionAdapter`."""
+        super().__init__(env_id, num_envs, seed, cfgs)
+        self.solver = None
+        self.compensator = None
+        self.first_iter = 1
+
+    def _wrapper(
+        self,
+        obs_normalize: bool = False,
+        reward_normalize: bool = True,
+        cost_normalize: bool = True,
+    ) -> None:
+        """Wrapper the environment.
+        
+        .. warning::
+            Since solving the optimization problem requires obtaining physical quantities with practical 
+            significance from state observations, the Barrier Function Adapter does not support 
+            normalization of observations.
+
+        Args:
+            obs_normalize (bool, optional): Whether to normalize the observation. Defaults to False.
+            reward_normalize (bool, optional): Whether to normalize the reward. Defaults to True.
+            cost_normalize (bool, optional): Whether to normalize the cost. Defaults to True.
+        """
+        assert not obs_normalize, 'Barrier function does not support observation normalization!'
+        if self._env.need_time_limit_wrapper:
+            self._env = TimeLimit(self._env, time_limit=1000, device=self._device)
+            self._eval_env = TimeLimit(self._eval_env, time_limit=1000, device=self._device)
+        if self._env.need_auto_reset_wrapper:
+            self._env = AutoReset(self._env, device=self._device)
+            self._eval_env = AutoReset(self._eval_env, device=self._device)
+        if reward_normalize:
+            self._env = RewardNormalize(self._env, device=self._device)
+        if cost_normalize:
+            self._env = CostNormalize(self._env, device=self._device)
+        if self._env.num_envs == 1:
+            self._env = Unsqueeze(self._env, device=self._device)
+        self._eval_env = Unsqueeze(self._eval_env, device=self._device)
+
+    def set_solver(self, solver: PendulumSolver):
+        """Set the barrier function solver for Pendulum environment."""
+        self.solver: PendulumSolver = solver
+        
+    def set_compensator(self, compensator: BarrierCompensator):
+        """Set the action compensator."""
+        self.compensator: BarrierCompensator = compensator
+
+    def reset_gp_model(self):
+        """Reset the gaussian processing model of barrier function solver."""
+        self.solver.GP_model_prev = self.solver.GP_model.copy()
+        self.solver.build_GP_model()
+
+    def rollout(  # pylint: disable=too-many-locals
+        self,
+        steps_per_epoch: int,
+        agent: ConstraintActorCritic,
+        buffer: VectorOnPolicyBuffer,
+        logger: Logger,
+    ) -> None:
+        """Rollout the environment and store the data in the buffer.
+
+        .. warning::
+            As OmniSafe uses :class:`AutoReset` wrapper, the environment will be reset automatically,
+            so the final observation will be stored in ``info['final_observation']``.
+
+        Args:
+            steps_per_epoch (int): Number of steps per epoch.
+            agent (ConstraintActorCritic): Constraint actor-critic, including actor , reward critic
+                and cost critic.
+            buffer (VectorOnPolicyBuffer): Vector on-policy buffer.
+            logger (Logger): Logger, to log ``EpRet``, ``EpCost``, ``EpLen``.
+        """
+        self._reset_log()
+        if not self.first_iter:
+            self.reset_gp_model()
+
+        obs, _ = self.reset()
+        while abs(self._env.unwrapped.state[0]) > 1:
+            obs, _ = self._env.reset()
+        path_obs = []
+        path_act = []
+        for step in track(
+            range(steps_per_epoch),
+            description=f'Processing rollout for epoch: {logger.current_epoch}...',
+        ):
+            with torch.no_grad():
+                value_r = agent.reward_critic(obs)[0]
+                value_c = agent.cost_critic(obs)[0]
+                act_dist = agent.actor(obs)
+                act_mean, act_std = act_dist.mean, agent.actor.std
+
+                approx_compensating_act = self.compensator(obs=obs)
+                compensated_act_mean_raw = act_mean + approx_compensating_act
+                
+                if self.first_iter:
+                    [f, g, x, std] = self.solver.get_GP_dynamics(obs, use_prev_model = False)
+                else:
+                    [f, g, x, std] = self.solver.get_GP_dynamics(obs, use_prev_model = True)
+                
+                compensating_act = self.solver.control_barrier(compensated_act_mean_raw, f, g, x, std)
+
+                compensated_act_mean = compensated_act_mean_raw + compensating_act
+                final_act = torch.normal(compensated_act_mean, act_std)
+            
+            logp = agent.actor.log_prob(final_act).detach()
+            path_obs.append(obs.detach().cpu().squeeze().numpy())
+            path_act.append(final_act.detach().cpu().squeeze().numpy())
+
+            next_obs, reward, cost, terminated, truncated, info = self.step(final_act)
+
+            self._log_value(reward=reward, cost=cost, info=info)
+
+            if self._cfgs.algo_cfgs.use_cost:
+                logger.store({'Value/cost': value_c})
+            logger.store({'Value/reward': value_r})
+            logger.store({'Metrics/angle': cost})
+
+            buffer.store(
+                obs=obs,
+                act=final_act,
+                reward=reward,
+                cost=cost,
+                value_r=value_r,
+                value_c=value_c,
+                logp=logp,
+                approx_compensating_act=approx_compensating_act.detach(),
+                compensating_act=compensating_act.detach(),
+            )
+
+            obs = next_obs
+            epoch_end = step >= steps_per_epoch
+            for idx, (done, time_out) in enumerate(zip(terminated, truncated)):
+                if epoch_end or done or time_out:
+                    last_value_r = torch.zeros(1)
+                    last_value_c = torch.zeros(1)
+                    if not done:
+                        if epoch_end:
+                            logger.log(
+                                f'Warning: trajectory cut off when rollout by epoch at {self._ep_len[idx]} steps.',
+                            )
+                            _, last_value_r, last_value_c, _ = agent.step(obs[idx])
+                        if time_out:
+                            _, last_value_r, last_value_c, _ = agent.step(
+                                obs[idx],
+                            )
+                        last_value_r = last_value_r.unsqueeze(0)
+                        last_value_c = last_value_c.unsqueeze(0)
+
+                    if done or time_out:
+                        self._log_metrics(logger, idx)
+                        self._reset_log(idx)
+
+                        self._ep_ret[idx] = 0.0
+                        self._ep_cost[idx] = 0.0
+                        self._ep_len[idx] = 0.0
+
+                        if step < 650:
+                            self.solver.update_GP_dynamics(obs = path_obs, act = path_act)
+
+                        path_obs = []
+                        path_act = []
+                        obs, _ = self.reset()
+                        while abs(self._env.unwrapped.state[0]) > 1:
+                            obs, _ = self._env.reset()
+                    buffer.finish_path(last_value_r, last_value_c, idx)
+        self.first_iter = 0
+
diff --git a/omnisafe/adapter/beta_barrier_function_adapter.py b/omnisafe/adapter/beta_barrier_function_adapter.py
new file mode 100644
index 000000000..f785c3062
--- /dev/null
+++ b/omnisafe/adapter/beta_barrier_function_adapter.py
@@ -0,0 +1,245 @@
+# Copyright 2023 OmniSafe Team. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""BarrierFunction Adapter for OmniSafe."""
+
+from __future__ import annotations
+
+import torch
+import numpy as np
+from rich.progress import track
+
+from omnisafe.adapter.onpolicy_adapter import OnPolicyAdapter
+from omnisafe.common.buffer import VectorOnPolicyBuffer
+from omnisafe.common.logger import Logger
+from omnisafe.models.actor_critic.constraint_actor_critic import ConstraintActorCritic
+from omnisafe.utils.config import Config
+from omnisafe.common.barrier_solver import PendulumSolver
+from omnisafe.common.barrier_comp import BarrierCompensator
+
+from omnisafe.envs.wrapper import (
+    AutoReset,
+    CostNormalize,
+    RewardNormalize,
+    TimeLimit,
+    Unsqueeze,
+)
+
+
+def cbf(state=None, eta: float = 0.99):
+    """
+    Calculates CBF constraint set at a given state. Default is
+    the current state.
+    """
+
+    state = state
+    g = 9.8
+    m = 1
+    l = 1
+    tau = 5e-2
+    theta_safety_bounds = [-1.0, 1.0]
+    thetadot_safety_bounds = [-np.inf, np.inf]
+    torque_bounds = [-15.0, 15.0]
+    if (eta>1-1e-3) or (eta<1e-5):
+        raise ValueError("eta should be inside (0, 1)")
+    c1 = ((3 * g)/(2 * l))
+    c2 = (3 /(m * (l ** 2)))
+
+    theta, thetadot = state[0], state[1]
+    theta_min, theta_max = theta_safety_bounds[0], theta_safety_bounds[1]
+    thetadot_min, thetadot_max = thetadot_safety_bounds[0], thetadot_safety_bounds[1]
+    u_min1 = (1/c2) * (((1 / (tau **2)) * (-eta * (theta - theta_min) - tau * thetadot)) - c1 * np.sin(theta) )
+    u_max1 = (1/c2) * (((1 / (tau **2)) * ( eta * (theta_max - theta) - tau * thetadot)) - c1 * np.sin(theta) )
+
+    
+    u_min2 = (1/c2) * (((1 / (tau)) * (-eta * (thetadot - thetadot_min))) - c1 * np.sin(theta) )
+    u_max2 = (1/c2) * (((1 / (tau)) * ( eta * (thetadot_max - thetadot))) - c1 * np.sin(theta) )
+
+    u_min = max(u_min1, u_min2, torque_bounds[0])
+    u_max = min(u_max1, u_max2, torque_bounds[1])
+    
+    u_min=torque_bounds[0]
+    u_max=torque_bounds[1]
+    if u_min>u_max:
+        raise ValueError("Infeasible")
+    else:
+        return [u_min, u_max]
+
+def vectorize_f(f): #--vipul :added action_dim
+    """
+    Converts a function f defined on 1D numpy arrays and outputting pairs of
+    scalars into a vectorized function accepting batches of
+    torch tensorized arrays and output pairs of torch tensors.
+    """
+
+    def vectorized_f_(obs): #--vipul :added action_dim
+
+        obs = obs.cpu().detach().numpy()
+
+        if len(obs.shape) == 1:  # check to see if obs is a batch or single obs
+            batch_size = 1
+            lbs, ubs = f(obs)
+            lbs=np.array(lbs)
+            ubs=np.array(ubs)
+            #lbs = -5
+            #ubs = 5
+
+        else:
+            batch_size = obs.shape[0]
+            lbs = np.zeros([batch_size, 1])
+            ubs = np.zeros([batch_size, 1])
+            for i in range(batch_size):
+                lbs[i], ubs[i] = f(obs[i])
+
+        lbs = torch.FloatTensor(lbs).reshape(batch_size, 1)
+        ubs = torch.FloatTensor(ubs).reshape(batch_size, 1)
+        
+        return lbs, ubs
+
+    return vectorized_f_
+
+
+class BetaBarrierFunctionAdapter(OnPolicyAdapter):
+    """BarrierFunction Adapter for OmniSafe.
+
+    The BarrierFunction Adapter is used to establish the logic of interaction between agents and the 
+    environment based on control barrier functions. Its key feature is the introduction of action 
+    compensators and barrier function solvers.
+
+    Args:
+        env_id (str): The environment id.
+        num_envs (int): The number of parallel environments.
+        seed (int): The random seed.
+        cfgs (Config): The configuration passed from yaml file.
+    """
+
+    def __init__(self, env_id: str, num_envs: int, seed: int, cfgs: Config) -> None:
+        """Initialize an instance of :class:`BarrierFunctionAdapter`."""
+        super().__init__(env_id, num_envs, seed, cfgs)
+        self.solver = None
+        self.compensator = None
+        self.first_iter = 1
+        self.constraint_fn = vectorize_f(cbf)
+
+    def _wrapper(
+        self,
+        obs_normalize: bool = False,
+        reward_normalize: bool = True,
+        cost_normalize: bool = True,
+    ) -> None:
+        """Wrapper the environment.
+        
+        .. warning::
+            Since solving the optimization problem requires obtaining physical quantities with practical 
+            significance from state observations, the Barrier Function Adapter does not support 
+            normalization of observations.
+
+        Args:
+            obs_normalize (bool, optional): Whether to normalize the observation. Defaults to False.
+            reward_normalize (bool, optional): Whether to normalize the reward. Defaults to True.
+            cost_normalize (bool, optional): Whether to normalize the cost. Defaults to True.
+        """
+        assert not obs_normalize, 'Barrier function does not support observation normalization!'
+        if reward_normalize:
+            self._env = RewardNormalize(self._env, device=self._device)
+        if cost_normalize:
+            self._env = CostNormalize(self._env, device=self._device)
+        if self._env.num_envs == 1:
+            self._env = Unsqueeze(self._env, device=self._device)
+        self._eval_env = Unsqueeze(self._eval_env, device=self._device)
+
+    def rollout(  # pylint: disable=too-many-locals
+        self,
+        steps_per_epoch: int,
+        agent: ConstraintActorCritic,
+        buffer: VectorOnPolicyBuffer,
+        logger: Logger,
+    ) -> None:
+        """Rollout the environment and store the data in the buffer.
+
+        .. warning::
+            As OmniSafe uses :class:`AutoReset` wrapper, the environment will be reset automatically,
+            so the final observation will be stored in ``info['final_observation']``.
+
+        Args:
+            steps_per_epoch (int): Number of steps per epoch.
+            agent (ConstraintActorCritic): Constraint actor-critic, including actor , reward critic
+                and cost critic.
+            buffer (VectorOnPolicyBuffer): Vector on-policy buffer.
+            logger (Logger): Logger, to log ``EpRet``, ``EpCost``, ``EpLen``.
+        """
+        self._reset_log()
+        obs, _ = self.reset()
+        while abs(self._env.unwrapped.state[0]) > 1:
+            obs, _ = self._env.reset()
+        for step in track(
+            range(steps_per_epoch),
+            description=f'Processing rollout for epoch: {logger.current_epoch}...',
+        ):
+            with torch.no_grad():
+                act, value_r, value_c, logp = agent.step(obs)
+                lb, ub = self.constraint_fn(obs)
+                final_act = lb + (ub-lb)*act
+
+            next_obs, reward, cost, terminated, truncated, info = self.step(final_act)
+            
+            self._log_value(reward=reward, cost=cost, info=info)
+
+            if self._cfgs.algo_cfgs.use_cost:
+                logger.store({'Value/cost': value_c})
+            logger.store({'Value/reward': value_r})
+            logger.store({'Metrics/angle': info.get('original_cost', cost).cpu()})
+
+            buffer.store(
+                obs=obs,
+                act=act,
+                reward=reward,
+                cost=cost,
+                value_r=value_r,
+                value_c=value_c,
+                logp=logp,
+            )
+
+            obs = next_obs
+            epoch_end = step >= steps_per_epoch
+            for idx, (done, time_out) in enumerate(zip(terminated, truncated)):
+                if epoch_end or done or time_out:
+                    last_value_r = torch.zeros(1)
+                    last_value_c = torch.zeros(1)
+                    if not done:
+                        if epoch_end:
+                            logger.log(
+                                f'Warning: trajectory cut off when rollout by epoch at {self._ep_len[idx]} steps.',
+                            )
+                            _, last_value_r, last_value_c, _ = agent.step(obs[idx])
+                        if time_out:
+                            _, last_value_r, last_value_c, _ = agent.step(
+                                obs[idx],
+                            )
+                        last_value_r = last_value_r.unsqueeze(0)
+                        last_value_c = last_value_c.unsqueeze(0)
+
+                    if done or time_out:
+                        self._log_metrics(logger, idx)
+                        self._reset_log(idx)
+
+                        self._ep_ret[idx] = 0.0
+                        self._ep_cost[idx] = 0.0
+                        self._ep_len[idx] = 0.0
+                        obs, _ = self.reset()
+                        while abs(self._env.unwrapped.state[0]) > 1:
+                            obs, _ = self._env.reset()
+                    buffer.finish_path(last_value_r, last_value_c, idx)
+        self.first_iter = 0
+
diff --git a/omnisafe/adapter/offpolicy_barrier_function_adapter.py b/omnisafe/adapter/offpolicy_barrier_function_adapter.py
new file mode 100644
index 000000000..b05e950cb
--- /dev/null
+++ b/omnisafe/adapter/offpolicy_barrier_function_adapter.py
@@ -0,0 +1,151 @@
+# Copyright 2023 OmniSafe Team. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""BarrierFunction Adapter for OmniSafe."""
+
+from __future__ import annotations
+
+import torch
+import numpy as np
+
+from omnisafe.adapter.offpolicy_adapter import OffPolicyAdapter
+from omnisafe.common.buffer import VectorOffPolicyBuffer
+from omnisafe.common.logger import Logger
+from omnisafe.utils.config import Config
+from omnisafe.common.barrier_solver import PendulumSolver
+from omnisafe.common.robust_barrier_solver import CBFQPLayer
+from omnisafe.common.barrier_comp import BarrierCompensator
+from omnisafe.models.actor_critic.constraint_actor_q_critic import ConstraintActorQCritic
+from omnisafe.common.robust_gp_model import DynamicsModel
+
+from omnisafe.envs.wrapper import (
+    CostNormalize,
+    RewardNormalize,
+    Unsqueeze,
+)
+
+class OffPolicyBarrierFunctionAdapter(OffPolicyAdapter):
+
+    def __init__(self, env_id: str, num_envs: int, seed: int, cfgs: Config) -> None:
+        """Initialize an instance of :class:`BarrierFunctionAdapter`."""
+        super().__init__(env_id, num_envs, seed, cfgs)
+        self.solver = None
+        self.compensator = None
+        self.first_iter = 1
+        self.episode_rollout = {}
+        self.episode_rollout['obs'] = []
+        self.episode_rollout['final_act'] = []
+        self.episode_rollout['approx_compensating_act'] = []
+        self.episode_rollout['compensating_act'] = []
+
+    def _wrapper(
+        self,
+        obs_normalize: bool = False,
+        reward_normalize: bool = True,
+        cost_normalize: bool = True,
+    ) -> None:
+        assert not obs_normalize, 'Barrier function does not support observation normalization!'
+        if reward_normalize:
+            self._env = RewardNormalize(self._env, device=self._device)
+        if cost_normalize:
+            self._env = CostNormalize(self._env, device=self._device)
+        if self._env.num_envs == 1:
+            self._env = Unsqueeze(self._env, device=self._device)
+        self._eval_env = Unsqueeze(self._eval_env, device=self._device)
+
+    def set_solver(self, solver: PendulumSolver):
+        """Set the barrier function solver for Pendulum environment."""
+        self.solver: PendulumSolver = solver
+        
+    def set_compensator(self, compensator: BarrierCompensator):
+        """Set the action compensator."""
+        self.compensator: BarrierCompensator = compensator
+
+    def reset_gp_model(self):
+        """Reset the gaussian processing model of barrier function solver."""
+        self.solver.GP_model_prev = self.solver.GP_model.copy()
+        self.solver.build_GP_model()
+
+    def rollout(  # pylint: disable=too-many-locals
+        self,
+        rollout_step: int,
+        agent: ConstraintActorQCritic,
+        buffer: VectorOffPolicyBuffer,
+        logger: Logger,
+        use_rand_action: bool,
+    ) -> None:
+        for _ in range(rollout_step):
+            if use_rand_action:
+                act = torch.normal(torch.zeros(self.action_space.shape), torch.ones(self.action_space.shape)).unsqueeze(0)
+            else:
+                act = agent.actor.predict(self._current_obs, deterministic=False)
+                
+            final_act = self.get_safe_action(obs=self._current_obs, act=act)
+
+            self.episode_rollout['obs'].append(self._current_obs)
+            self.episode_rollout['final_act'].append(final_act)
+
+            next_obs, reward, cost, terminated, truncated, info = self.step(final_act)
+            logger.store({'Metrics/angle': cost})
+            
+            self._log_value(reward=reward, cost=cost, info=info)
+
+            buffer.store(
+                obs=self._current_obs,
+                act=act,
+                reward=reward,
+                cost=cost,
+                done=torch.logical_and(terminated, torch.logical_xor(terminated, truncated)),
+                next_obs=next_obs,
+            )
+
+            self._current_obs = next_obs
+            for idx, done in enumerate(torch.logical_or(terminated, truncated)):
+                if done:
+                    self._log_metrics(logger, idx)
+                    compensator_loss = self.compensator.train(
+                        torch.cat(self.episode_rollout['obs']), 
+                        torch.cat(self.episode_rollout['approx_compensating_act']), 
+                        torch.cat(self.episode_rollout['compensating_act']),
+                        )
+                    logger.store({'Value/Loss_compensator': compensator_loss.item()})
+                    self.solver.update_GP_dynamics(obs=torch.cat(self.episode_rollout['obs']), act=torch.cat(self.episode_rollout['final_act']))
+                    
+                    self.episode_rollout['obs'] = []
+                    self.episode_rollout['final_act'] = []
+                    self.episode_rollout['approx_compensating_act'] = []
+                    self.episode_rollout['compensating_act'] = []
+                    
+                    self._reset_log(idx)
+                    self._current_obs, _ = self._env.reset()
+                    self.first_iter = 0
+                    if not self.first_iter:
+                        self.reset_gp_model()
+
+    @torch.no_grad
+    def get_safe_action(self, obs, act):
+        approx_compensating_act = self.compensator(obs=self._current_obs)
+        compensated_act_mean_raw = act + approx_compensating_act
+        
+        if self.first_iter:
+            [f, g, x, std] = self.solver.get_GP_dynamics(obs, use_prev_model = False)
+        else:
+            [f, g, x, std] = self.solver.get_GP_dynamics(obs, use_prev_model = True)
+            
+        compensating_act = self.solver.control_barrier(compensated_act_mean_raw, f, g, x, std)
+        safe_act = compensated_act_mean_raw + compensating_act
+
+        self.episode_rollout['compensating_act'].append(compensating_act)
+        self.episode_rollout['approx_compensating_act'].append(approx_compensating_act)
+        return safe_act
\ No newline at end of file
diff --git a/omnisafe/adapter/robust_barrier_function_adapter.py b/omnisafe/adapter/robust_barrier_function_adapter.py
new file mode 100644
index 000000000..f58f1e176
--- /dev/null
+++ b/omnisafe/adapter/robust_barrier_function_adapter.py
@@ -0,0 +1,174 @@
+# Copyright 2023 OmniSafe Team. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""BarrierFunction Adapter for OmniSafe."""
+
+from __future__ import annotations
+
+import torch
+import numpy as np
+
+from omnisafe.adapter.offpolicy_adapter import OffPolicyAdapter
+from omnisafe.common.buffer import VectorOffPolicyBuffer
+from omnisafe.common.logger import Logger
+from omnisafe.utils.config import Config
+from omnisafe.common.robust_barrier_solver import CBFQPLayer
+from omnisafe.common.barrier_comp import BarrierCompensator
+from omnisafe.models.actor_critic.constraint_actor_q_critic import ConstraintActorQCritic
+from omnisafe.typing import OmnisafeSpace
+from omnisafe.common.robust_gp_model import DynamicsModel
+
+
+from omnisafe.envs.wrapper import (
+    CostNormalize,
+    RewardNormalize,
+    Unsqueeze,
+)
+
+class RobustBarrierFunctionAdapter(OffPolicyAdapter):
+
+    def __init__(self, env_id: str, num_envs: int, seed: int, cfgs: Config) -> None:
+        """Initialize an instance of :class:`BarrierFunctionAdapter`."""
+        super().__init__(env_id, num_envs, seed, cfgs)
+        self.solver = None
+        self.compensator = None
+        self._current_steps = 0
+        self._num_episodes = 0
+
+    def _wrapper(
+        self,
+        obs_normalize: bool = False,
+        reward_normalize: bool = True,
+        cost_normalize: bool = True,
+    ) -> None:
+        """Wrapper the environment.
+        
+        .. warning::
+            Since solving the optimization problem requires obtaining physical quantities with practical 
+            significance from state observations, the Barrier Function Adapter does not support 
+            normalization of observations.
+
+        Args:
+            obs_normalize (bool, optional): Whether to normalize the observation. Defaults to False.
+            reward_normalize (bool, optional): Whether to normalize the reward. Defaults to True.
+            cost_normalize (bool, optional): Whether to normalize the cost. Defaults to True.
+        """
+        assert not obs_normalize, 'Barrier function does not support observation normalization!'
+        if reward_normalize:
+            self._env = RewardNormalize(self._env, device=self._device)
+        if cost_normalize:
+            self._env = CostNormalize(self._env, device=self._device)
+        if self._env.num_envs == 1:
+            self._env = Unsqueeze(self._env, device=self._device)
+        self._eval_env = Unsqueeze(self._eval_env, device=self._device)
+        # self._env = ActionScale(self._env, low=-1.0, high=1.0, device=self._device)
+        # self._eval_env = ActionScale(self._eval_env, low=-1.0, high=1.0, device=self._device)
+        
+    def set_solver(self, solver: CBFQPLayer):
+        """Set the barrier function solver for Pendulum environment."""
+        self.solver: CBFQPLayer = solver
+        self.solver.env = self._env
+
+    def set_dynamics_model(self, dynamics_model: DynamicsModel):
+        """Set the dynamics model."""
+        self.dynamics_model = dynamics_model
+        self.dynamics_model.env = self._env
+
+    def rollout(  # pylint: disable=too-many-locals
+        self,
+        rollout_step: int,
+        agent: ConstraintActorQCritic,
+        buffer: VectorOffPolicyBuffer,
+        logger: Logger,
+        use_rand_action: bool,
+    ) -> None:
+        """Rollout the environment and store the data in the buffer.
+
+        .. warning::
+            As OmniSafe uses :class:`AutoReset` wrapper, the environment will be reset automatically,
+            so the final observation will be stored in ``info['final_observation']``.
+
+        Args:
+            rollout_step (int): Number of rollout steps.
+            agent (ConstraintActorCritic): Constraint actor-critic, including actor, reward critic,
+                and cost critic.
+            buffer (VectorOnPolicyBuffer): Vector on-policy buffer.
+            logger (Logger): Logger, to log ``EpRet``, ``EpCost``, ``EpLen``.
+            use_rand_action (bool): Whether to use random action.
+        """
+        for _ in range(rollout_step):
+            state = self.dynamics_model.get_state(self._current_obs) # 动态模型将观测转换为状态，状态和观测之间有一个互逆的转换
+            self._current_steps += 1
+            if use_rand_action:
+                act = torch.normal(torch.zeros(self.action_space.shape), torch.ones(self.action_space.shape)).unsqueeze(0).to(self._device)
+            else:
+                act = agent.step(self._current_obs, deterministic=False)
+
+            final_act = self.get_safe_action(obs=self._current_obs, act=act)
+            next_obs, reward, cost, terminated, truncated, info = self.step(final_act)
+            self._log_value(reward=reward, cost=cost, info=info)
+
+            buffer.store(
+                obs=self._current_obs,
+                act=final_act,
+                reward=reward,
+                cost=cost,
+                done=torch.logical_and(terminated, torch.logical_xor(terminated, truncated)),
+                next_obs=next_obs,
+            )
+            
+            if self._ep_len[0] % 2 == 0 and self._num_episodes < self._cfgs.dynamics_model_cfgs.gp_max_episodes:
+                next_state = self.dynamics_model.get_state(next_obs)
+                self.dynamics_model.append_transition(state.cpu().detach().numpy(), final_act.cpu().detach().numpy(), next_state.cpu().detach().numpy(), t_batch=np.array([self._ep_len[0]*self._env.dt]))
+                
+            self._current_obs = next_obs
+            for idx, done in enumerate(torch.logical_or(terminated, truncated)):
+                if done:
+                    self._log_metrics(logger, idx)
+                    self._reset_log(idx)
+                    self._num_episodes += 1
+                    self._current_obs, _ = self._env.reset()
+            
+    @property
+    def safe_action_space(self) -> OmnisafeSpace:
+        if hasattr(self._env, 'safe_action_space'):
+            return self._env.safe_action_space
+        else:
+            return self._env.action_space
+            
+    def get_safe_action(self, obs, act, modular=False, cbf_info_batch=None):
+        """Given a nominal action, returns a minimally-altered safe action to take.
+
+        Parameters
+        ----------
+        obs : torch.tensor
+        act : torch.tensor
+        dynamics_model : DynamicsModel
+
+        Returns
+        -------
+        safe_act : torch.tensor
+            Safe actions to be taken (cbf_action + action).
+        """
+        state_batch = self.dynamics_model.get_state(obs)
+        mean_pred_batch, sigma_pred_batch = self.dynamics_model.predict_disturbance(state_batch)
+        safe_act = self.solver.get_safe_action(state_batch, act, mean_pred_batch, sigma_pred_batch, modular=modular, cbf_info_batch=cbf_info_batch)
+
+        return safe_act
+
+    def __getattr__(self, name):
+        try:
+            return getattr(self._env, name)
+        except AttributeError:
+            raise AttributeError(f"'{type(self).__name__}' object has no attribute '{name}'")
\ No newline at end of file
diff --git a/omnisafe/algorithms/__init__.py b/omnisafe/algorithms/__init__.py
index df6832226..f25928ad2 100644
--- a/omnisafe/algorithms/__init__.py
+++ b/omnisafe/algorithms/__init__.py
@@ -35,6 +35,8 @@
     DDPGLag,
     SACLag,
     TD3Lag,
+    SACRCBF,
+    DDPGCBF,
 )
 
 # Offline Safe
@@ -63,6 +65,8 @@
     TRPOLag,
     TRPOSaute,
     TRPOSimmerPID,
+    TRPOCBF,
+    PPOBetaCBF,
 )
 
 
diff --git a/omnisafe/algorithms/off_policy/__init__.py b/omnisafe/algorithms/off_policy/__init__.py
index 80e48e1a0..e87bd82f2 100644
--- a/omnisafe/algorithms/off_policy/__init__.py
+++ b/omnisafe/algorithms/off_policy/__init__.py
@@ -24,17 +24,8 @@
 from omnisafe.algorithms.off_policy.td3 import TD3
 from omnisafe.algorithms.off_policy.td3_lag import TD3Lag
 from omnisafe.algorithms.off_policy.td3_pid import TD3PID
+from omnisafe.algorithms.off_policy.sac_rcbf import SACRCBF
+from omnisafe.algorithms.off_policy.ddpg_cbf import DDPGCBF
 
 
-__all__ = [
-    'DDPG',
-    'TD3',
-    'SAC',
-    'DDPGLag',
-    'TD3Lag',
-    'SACLag',
-    'DDPGPID',
-    'TD3PID',
-    'SACPID',
-    'CRABS',
-]
+__all__ = ['DDPG', 'TD3', 'SAC', 'DDPGLag', 'TD3Lag', 'SACLag', 'DDPGPID', 'TD3PID', 'SACPID', 'SACRCBF', 'DDPGCBF', 'CRABS']
diff --git a/omnisafe/algorithms/off_policy/ddpg_cbf.py b/omnisafe/algorithms/off_policy/ddpg_cbf.py
new file mode 100644
index 000000000..12692db67
--- /dev/null
+++ b/omnisafe/algorithms/off_policy/ddpg_cbf.py
@@ -0,0 +1,93 @@
+# Copyright 2023 OmniSafe Team. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Implementation of the DDPG algorithm with Control Barrier Function."""
+
+import torch
+
+from omnisafe.algorithms import registry
+from omnisafe.algorithms.off_policy.ddpg import DDPG
+from omnisafe.common.barrier_solver import PendulumSolver
+from omnisafe.adapter.offpolicy_barrier_function_adapter import OffPolicyBarrierFunctionAdapter
+from omnisafe.common.barrier_comp import BarrierCompensator
+
+
+@registry.register
+# pylint: disable-next=too-many-instance-attributes, too-few-public-methods
+class DDPGCBF(DDPG):
+    """The Soft Actor-Critic algorithm with Control Barrier Function.
+
+    References:
+        - Title: Soft Actor-Critic: Off-Policy Maximum Entropy Deep Reinforcement Learning with a Stochastic Actor
+        - Authors: Tuomas Haarnoja, Aurick Zhou, Pieter Abbeel, Sergey Levine.
+        - URL: `DDPG <https://arxiv.org/abs/1801.01290>`_
+    """
+
+    def _init_env(self) -> None:
+        self._env: OffPolicyBarrierFunctionAdapter=OffPolicyBarrierFunctionAdapter(
+            self._env_id,
+            self._cfgs.train_cfgs.vector_env_nums,
+            self._seed,
+            self._cfgs,
+        )
+        solver = PendulumSolver(device=self._cfgs.train_cfgs.device)
+        compensator = BarrierCompensator(
+            obs_dim=self._env.observation_space.shape[0],
+            act_dim=self._env.action_space.shape[0],
+            cfgs=self._cfgs.compensator_cfgs,
+        )
+        
+        self._env.set_compensator(compensator=compensator)
+        self._env.set_solver(solver=solver)
+        
+        assert (
+            self._cfgs.algo_cfgs.steps_per_epoch % self._cfgs.train_cfgs.vector_env_nums == 0
+        ), 'The number of steps per epoch is not divisible by the number of environments.'
+
+        assert (
+            int(self._cfgs.train_cfgs.total_steps) % self._cfgs.algo_cfgs.steps_per_epoch == 0
+        ), 'The total number of steps is not divisible by the number of steps per epoch.'
+        self._epochs: int=int(
+            self._cfgs.train_cfgs.total_steps // self._cfgs.algo_cfgs.steps_per_epoch,
+        )
+        self._epoch: int=0
+        self._steps_per_epoch: int=(
+            self._cfgs.algo_cfgs.steps_per_epoch // self._cfgs.train_cfgs.vector_env_nums
+        )
+
+        self._update_cycle: int=self._cfgs.algo_cfgs.update_cycle
+        assert (
+            self._steps_per_epoch % self._update_cycle == 0
+        ), 'The number of steps per epoch is not divisible by the number of steps per sample.'
+        self._samples_per_epoch: int=self._steps_per_epoch // self._update_cycle
+        self._update_count: int=0
+   
+    def _init(self) -> None:
+        super()._init()
+        self._buf.add_field(name='approx_compensating_act', shape=self._env.action_space.shape, dtype=torch.float32)
+        self._buf.add_field(name='compensating_act', shape=self._env.action_space.shape, dtype=torch.float32)
+        
+    def _init_log(self) -> None:
+        # """Log the DDPGRCBF specific information.
+
+        # +----------------------------+--------------------------+
+        # | Things to log              | Description              |
+        # +============================+==========================+
+        # | Metrics/LagrangeMultiplier | The Lagrange multiplier. |
+        # +----------------------------+--------------------------+
+        # """
+        super()._init_log()
+        if self._cfgs.env_id == 'Pendulum-v1':
+            self._logger.register_key('Metrics/angle', min_and_max=True)
+        self._logger.register_key('Value/Loss_compensator')
\ No newline at end of file
diff --git a/omnisafe/algorithms/off_policy/sac_rcbf.py b/omnisafe/algorithms/off_policy/sac_rcbf.py
new file mode 100644
index 000000000..e980025c3
--- /dev/null
+++ b/omnisafe/algorithms/off_policy/sac_rcbf.py
@@ -0,0 +1,175 @@
+# Copyright 2023 OmniSafe Team. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Implementation of the Soft Actor-Critic algorithm with Robust Control Barrier Function."""
+
+
+import torch
+from torch import nn
+from torch.nn.utils.clip_grad import clip_grad_norm_
+
+from omnisafe.algorithms import registry
+from omnisafe.algorithms.off_policy.sac import SAC
+from omnisafe.common.robust_barrier_solver import CBFQPLayer
+from omnisafe.adapter.robust_barrier_function_adapter import RobustBarrierFunctionAdapter
+from omnisafe.common.barrier_comp import BarrierCompensator
+from omnisafe.common.robust_gp_model import DynamicsModel
+
+
+@registry.register
+# pylint: disable-next=too-many-instance-attributes, too-few-public-methods
+class SACRCBF(SAC):
+    """The Soft Actor-Critic algorithm with Robust Control Barrier Function.
+
+    References:
+        - Title: Soft Actor-Critic: Off-Policy Maximum Entropy Deep Reinforcement Learning with a Stochastic Actor
+        - Authors: Tuomas Haarnoja, Aurick Zhou, Pieter Abbeel, Sergey Levine.
+        - URL: `SAC <https://arxiv.org/abs/1801.01290>`_
+    """
+
+    def _init_env(self) -> None:
+        self._env: RobustBarrierFunctionAdapter=RobustBarrierFunctionAdapter(
+            self._env_id,
+            self._cfgs.train_cfgs.vector_env_nums,
+            self._seed,
+            self._cfgs,
+        )
+        solver=CBFQPLayer(
+            env=self._env,
+            device=self._cfgs.train_cfgs.device,
+            gamma_b=self._cfgs.cbf_cfgs.gamma_b,
+            k_d=self._cfgs.cbf_cfgs.k_d,
+            l_p=self._cfgs.cbf_cfgs.l_p,
+        )
+        dynamics_model=DynamicsModel(env=self._env)
+        
+        self._env.set_dynamics_model(dynamics_model=dynamics_model)
+        self._env.set_solver(solver=solver)
+            
+        assert (
+            self._cfgs.algo_cfgs.steps_per_epoch % self._cfgs.train_cfgs.vector_env_nums == 0
+        ), 'The number of steps per epoch is not divisible by the number of environments.'
+
+        assert (
+            int(self._cfgs.train_cfgs.total_steps) % self._cfgs.algo_cfgs.steps_per_epoch == 0
+        ), 'The total number of steps is not divisible by the number of steps per epoch.'
+        self._epochs: int=int(
+            self._cfgs.train_cfgs.total_steps // self._cfgs.algo_cfgs.steps_per_epoch,
+        )
+        self._epoch: int=0
+        self._steps_per_epoch: int=(
+            self._cfgs.algo_cfgs.steps_per_epoch // self._cfgs.train_cfgs.vector_env_nums
+        )
+
+        self._update_cycle: int=self._cfgs.algo_cfgs.update_cycle
+        assert (
+            self._steps_per_epoch % self._update_cycle == 0
+        ), 'The number of steps per epoch is not divisible by the number of steps per sample.'
+        self._samples_per_epoch: int=self._steps_per_epoch // self._update_cycle
+        self._update_count: int=0
+
+    def _init_log(self) -> None:
+        # """Log the SACRCBF specific information.
+
+        # +----------------------------+--------------------------+
+        # | Things to log              | Description              |
+        # +============================+==========================+
+        # | Metrics/LagrangeMultiplier | The Lagrange multiplier. |
+        # +----------------------------+--------------------------+
+        # """
+        super()._init_log()
+        if self._cfgs.env_id == 'Pendulum-v1':
+            self._logger.register_key('Metrics/angle', min_and_max=True)
+
+    def _update_actor(
+        self,
+        obs: torch.Tensor,
+    ) -> None:
+        super()._update_actor(obs)
+
+        if self._cfgs.algo_cfgs.auto_alpha:
+            with torch.no_grad():
+                action = self._actor_critic.actor.predict(obs, deterministic=False)
+                action = self._env.get_safe_action(obs, action)
+                log_prob = self._actor_critic.actor.log_prob(action)
+            alpha_loss = -self._log_alpha * (log_prob + self._target_entropy).mean()
+
+            self._alpha_optimizer.zero_grad()
+            alpha_loss.backward()
+            self._alpha_optimizer.step()
+            self._logger.store(
+                {
+                    'Loss/alpha_loss': alpha_loss.mean().item(),
+                },
+            )
+        self._logger.store(
+            {
+                'Value/alpha': self._alpha,
+            },
+        )
+
+    def _update_reward_critic(
+        self,
+        obs: torch.Tensor,
+        action: torch.Tensor,
+        reward: torch.Tensor,
+        done: torch.Tensor,
+        next_obs: torch.Tensor,
+    ) -> None:
+        with torch.no_grad():
+            next_action = self._actor_critic.actor.predict(next_obs, deterministic=False)
+            next_action = self._env.get_safe_action(next_obs, next_action)
+            next_logp = self._actor_critic.actor.log_prob(next_action)
+            next_q1_value_r, next_q2_value_r = self._actor_critic.target_reward_critic(
+                next_obs,
+                next_action,
+            )
+            next_q_value_r = torch.min(next_q1_value_r, next_q2_value_r) - next_logp * self._alpha
+            target_q_value_r = reward + self._cfgs.algo_cfgs.gamma * (1 - done) * next_q_value_r
+
+        q1_value_r, q2_value_r = self._actor_critic.reward_critic(obs, action)
+        loss = nn.functional.mse_loss(q1_value_r, target_q_value_r) + nn.functional.mse_loss(
+            q2_value_r,
+            target_q_value_r,
+        )
+
+        if self._cfgs.algo_cfgs.use_critic_norm:
+            for param in self._actor_critic.reward_critic.parameters():
+                loss += param.pow(2).sum() * self._cfgs.algo_cfgs.critic_norm_coeff
+
+        self._actor_critic.reward_critic_optimizer.zero_grad()
+        loss.backward()
+
+        if self._cfgs.algo_cfgs.max_grad_norm:
+            clip_grad_norm_(
+                self._actor_critic.reward_critic.parameters(),
+                self._cfgs.algo_cfgs.max_grad_norm,
+            )
+        self._actor_critic.reward_critic_optimizer.step()
+        self._logger.store(
+            {
+                'Loss/Loss_reward_critic': loss.mean().item(),
+                'Value/reward_critic': q1_value_r.mean().item(),
+            },
+        )
+        
+    def _loss_pi(
+        self,
+        obs: torch.Tensor,
+    ) -> torch.Tensor:
+        action = self._actor_critic.actor.predict(obs, deterministic=False)
+        action = self._env.get_safe_action(obs, action)
+        log_prob = self._actor_critic.actor.log_prob(action)
+        q1_value_r, q2_value_r = self._actor_critic.reward_critic(obs, action)
+        return (self._alpha * log_prob - torch.min(q1_value_r, q2_value_r)).mean()
\ No newline at end of file
diff --git a/omnisafe/algorithms/on_policy/__init__.py b/omnisafe/algorithms/on_policy/__init__.py
index 722ce0b11..06932a307 100644
--- a/omnisafe/algorithms/on_policy/__init__.py
+++ b/omnisafe/algorithms/on_policy/__init__.py
@@ -25,6 +25,7 @@
     saute,
     second_order,
     simmer,
+    barrier_function,
 )
 from omnisafe.algorithms.on_policy.base import PPO, TRPO, NaturalPG, PolicyGradient
 from omnisafe.algorithms.on_policy.early_terminated import PPOEarlyTerminated, TRPOEarlyTerminated
@@ -36,6 +37,7 @@
 from omnisafe.algorithms.on_policy.saute import PPOSaute, TRPOSaute
 from omnisafe.algorithms.on_policy.second_order import CPO, PCPO
 from omnisafe.algorithms.on_policy.simmer import PPOSimmerPID, TRPOSimmerPID
+from omnisafe.algorithms.on_policy.barrier_function import TRPOCBF, PPOBetaCBF
 
 
 __all__ = [
@@ -49,4 +51,5 @@
     *saute.__all__,
     *second_order.__all__,
     *simmer.__all__,
+    *barrier_function.__all__,
 ]
diff --git a/omnisafe/algorithms/on_policy/barrier_function/__init__.py b/omnisafe/algorithms/on_policy/barrier_function/__init__.py
new file mode 100644
index 000000000..273ca2831
--- /dev/null
+++ b/omnisafe/algorithms/on_policy/barrier_function/__init__.py
@@ -0,0 +1,24 @@
+# Copyright 2023 OmniSafe Team. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Control Barrier Function Safe Reinforcement Learning algorithms."""
+
+from omnisafe.algorithms.on_policy.barrier_function.trpo_cbf import TRPOCBF
+from omnisafe.algorithms.on_policy.barrier_function.ppo_cbf import PPOBetaCBF
+
+
+__all__ = [
+    'TRPOCBF',
+    'PPOBetaCBF',
+]
diff --git a/omnisafe/algorithms/on_policy/barrier_function/ppo_cbf.py b/omnisafe/algorithms/on_policy/barrier_function/ppo_cbf.py
new file mode 100644
index 000000000..e7711ed3c
--- /dev/null
+++ b/omnisafe/algorithms/on_policy/barrier_function/ppo_cbf.py
@@ -0,0 +1,106 @@
+# Copyright 2023 OmniSafe Team. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Implementation of the PPO algorithm with Control Barrier Function."""
+
+from __future__ import annotations
+
+import torch
+
+from omnisafe.adapter.beta_barrier_function_adapter import BetaBarrierFunctionAdapter
+from omnisafe.algorithms import registry
+from omnisafe.algorithms.on_policy.base.ppo import PPO
+from omnisafe.utils import distributed
+
+
+@registry.register
+class PPOBetaCBF(PPO):
+    
+    def _init_log(self) -> None:
+        super()._init_log()
+        self._logger.register_key('Metrics/angle', min_and_max=True)
+        self._logger.register_key('Value/Loss_compensator')
+
+    def _init_env(self) -> None:
+        self._env: BetaBarrierFunctionAdapter = BetaBarrierFunctionAdapter(
+            self._env_id,
+            self._cfgs.train_cfgs.vector_env_nums,
+            self._seed,
+            self._cfgs,
+        )
+        assert (self._cfgs.algo_cfgs.steps_per_epoch) % (
+            distributed.world_size() * self._cfgs.train_cfgs.vector_env_nums
+        ) == 0, 'The number of steps per epoch is not divisible by the number of environments.'
+        self._steps_per_epoch: int = (
+            self._cfgs.algo_cfgs.steps_per_epoch
+            // distributed.world_size()
+            // self._cfgs.train_cfgs.vector_env_nums
+        )
+
+    def _init_log(self) -> None:
+        super()._init_log()
+        self._logger.register_key('Metrics/angle', min_and_max=True)
+        
+    def _loss_pi(
+        self,
+        obs: torch.Tensor,
+        act: torch.Tensor,
+        logp: torch.Tensor,
+        adv: torch.Tensor,
+    ) -> torch.Tensor:
+        r"""Computing pi/actor loss.
+
+        In Proximal Policy Optimization, the loss is defined as:
+
+        .. math::
+
+            L^{CLIP} = \underset{s_t \sim \rho_{\theta}}{\mathbb{E}} \left[
+                \min ( r_t A^{R}_{\pi_{\theta}} (s_t, a_t) , \text{clip} (r_t, 1 - \epsilon, 1 + \epsilon)
+                A^{R}_{\pi_{\theta}} (s_t, a_t)
+            \right]
+
+        where :math:`r_t = \frac{\pi_{\theta}^{'} (a_t|s_t)}{\pi_{\theta} (a_t|s_t)}`,
+        :math:`\epsilon` is the clip parameter, and :math:`A^{R}_{\pi_{\theta}} (s_t, a_t)` is the
+        advantage.
+
+        Args:
+            obs (torch.Tensor): The ``observation`` sampled from buffer.
+            act (torch.Tensor): The ``action`` sampled from buffer.
+            logp (torch.Tensor): The ``log probability`` of action sampled from buffer.
+            adv (torch.Tensor): The ``advantage`` processed. ``reward_advantage`` here.
+
+        Returns:
+            The loss of pi/actor.
+        """
+        distribution = self._actor_critic.actor(obs)
+        logp_ = self._actor_critic.actor.log_prob(act)
+        std = self._actor_critic.actor.std
+        ratio = torch.exp(logp_ - logp)
+        ratio_cliped = torch.clamp(
+            ratio,
+            1 - self._cfgs.algo_cfgs.clip,
+            1 + self._cfgs.algo_cfgs.clip,
+        )
+        loss = -torch.min(ratio * adv, ratio_cliped * adv).mean()
+        loss -= self._cfgs.algo_cfgs.entropy_coef * distribution.entropy().mean()
+        # useful extra info
+        entropy = distribution.entropy().mean().item()
+        self._logger.store(
+            {
+                'Train/Entropy': entropy,
+                'Train/PolicyRatio': ratio,
+                'Loss/Loss_pi': loss.mean().item(),
+            },
+        )
+        return loss
\ No newline at end of file
diff --git a/omnisafe/algorithms/on_policy/barrier_function/trpo_cbf.py b/omnisafe/algorithms/on_policy/barrier_function/trpo_cbf.py
new file mode 100644
index 000000000..404776d72
--- /dev/null
+++ b/omnisafe/algorithms/on_policy/barrier_function/trpo_cbf.py
@@ -0,0 +1,117 @@
+# Copyright 2023 OmniSafe Team. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Implementation of the TRPO algorithm with Control Barrier Function."""
+
+from __future__ import annotations
+
+import torch
+from torch.utils.data import DataLoader, TensorDataset
+
+from omnisafe.adapter.barrier_function_adapter import BarrierFunctionAdapter
+from omnisafe.algorithms import registry
+from omnisafe.algorithms.on_policy.base.trpo import TRPO
+from omnisafe.utils import distributed
+from omnisafe.common.barrier_solver import PendulumSolver
+from omnisafe.common.barrier_comp import BarrierCompensator
+
+@registry.register
+class TRPOCBF(TRPO):
+    
+    def _init_log(self) -> None:
+        super()._init_log()
+        self._logger.register_key('Metrics/angle', min_and_max=True)
+        self._logger.register_key('Value/Loss_compensator')
+
+    def _init_env(self) -> None:
+        self._env: BarrierFunctionAdapter = BarrierFunctionAdapter(
+            self._env_id,
+            self._cfgs.train_cfgs.vector_env_nums,
+            self._seed,
+            self._cfgs,
+        )
+        assert (self._cfgs.algo_cfgs.steps_per_epoch) % (
+            distributed.world_size() * self._cfgs.train_cfgs.vector_env_nums
+        ) == 0, 'The number of steps per epoch is not divisible by the number of environments.'
+        self._steps_per_epoch: int = (
+            self._cfgs.algo_cfgs.steps_per_epoch
+            // distributed.world_size()
+            // self._cfgs.train_cfgs.vector_env_nums
+        )
+        self.solver = PendulumSolver(device=self._cfgs.train_cfgs.device)
+        self.compensator = BarrierCompensator(
+            obs_dim = self._env.observation_space.shape[0],
+            act_dim = self._env.action_space.shape[0],
+            cfgs = self._cfgs.compensator_cfgs,
+        )
+        self._env.set_solver(solver=self.solver)
+        self._env.set_compensator(compensator=self.compensator)
+        
+    def _init(self) -> None:
+        super()._init()
+        self._buf.add_field(name='approx_compensating_act', shape=self._env.action_space.shape, dtype=torch.float32)
+        self._buf.add_field(name='compensating_act', shape=self._env.action_space.shape, dtype=torch.float32)
+        
+    def _update(self) -> None:
+        """Update actor, critic.
+
+        .. hint::
+            Here are some differences between NPG and Policy Gradient (PG): In PG, the actor network
+            and the critic network are updated together. When the KL divergence between the old
+            policy, and the new policy is larger than a threshold, the update is rejected together.
+
+            In NPG, the actor network and the critic network are updated separately. When the KL
+            divergence between the old policy, and the new policy is larger than a threshold, the
+            update of the actor network is rejected, but the update of the critic network is still
+            accepted.
+        """
+        data = self._buf.get()
+        
+        obs, act, logp, target_value_r, target_value_c, adv_r, adv_c, approx_compensating_act, compensating_act = (
+            data['obs'],
+            data['act'],
+            data['logp'],
+            data['target_value_r'],
+            data['target_value_c'],
+            data['adv_r'],
+            data['adv_c'],
+            data['approx_compensating_act'],
+            data['compensating_act'],
+        )
+
+        self._update_actor(obs, act, logp, adv_r, adv_c)
+        compensator_loss = self._env.compensator.train(observation=obs, approx_compensating_act=approx_compensating_act, compensating_act=compensating_act)
+        dataloader = DataLoader(
+            dataset=TensorDataset(obs, target_value_r, target_value_c),
+            batch_size=self._cfgs.algo_cfgs.batch_size,
+            shuffle=True,
+        )
+
+        for _ in range(self._cfgs.algo_cfgs.update_iters):
+            for (
+                obs,
+                target_value_r,
+                target_value_c,
+            ) in dataloader:
+                self._update_reward_critic(obs, target_value_r)
+                if self._cfgs.algo_cfgs.use_cost:
+                    self._update_cost_critic(obs, target_value_c)
+
+        self._logger.store(
+            {
+                'Train/StopIter': self._cfgs.algo_cfgs.update_iters,
+                'Value/Adv': adv_r.mean().item(),
+                'Value/Loss_compensator': compensator_loss.item(),
+            },
+        )
diff --git a/omnisafe/common/barrier_comp.py b/omnisafe/common/barrier_comp.py
new file mode 100644
index 000000000..57d39a8d6
--- /dev/null
+++ b/omnisafe/common/barrier_comp.py
@@ -0,0 +1,86 @@
+# Copyright 2024 OmniSafe Team. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+from __future__ import annotations
+
+import torch
+from torch import optim
+from omnisafe.utils.model import build_mlp_network
+from omnisafe.utils.config import Config
+
+class BarrierCompensator(torch.nn.Module):
+    """A module that represents a barrier compensator using a multi-layer perceptron (MLP) network.
+
+    This module is designed to compute actions based on observations, with the intention of compensating for
+    potential barriers in a control system or a similar application. It is built upon a configurable MLP network
+    and trained using an optimization routine.
+
+    Attributes:
+        obs_dim (int): Dimension of the observation space.
+        act_dim (int): Dimension of the action space.
+        _cfgs (Config): Configuration parameters for the MLP network and training.
+        model (torch.nn.Module): The MLP network.
+        optimizer (torch.optim.Optimizer): The optimizer for training the network.
+
+    Args:
+        obs_dim (int): Dimension of the observation space.
+        act_dim (int): Dimension of the action space.
+        cfgs (Config): Configuration parameters for the network and training.
+    """
+    
+    def __init__(self, obs_dim: int, act_dim: int, cfgs: Config):
+        super(BarrierCompensator, self).__init__()
+        self._cfgs: Config = cfgs
+        self.model: torch.nn.Module = build_mlp_network(
+            sizes=[obs_dim, *self._cfgs.hidden_sizes, act_dim],
+            activation=self._cfgs.activation,
+            weight_initialization_mode=self._cfgs.weight_initialization_mode,
+        )
+        self.optimizer: optim.Adam = optim.Adam(self.parameters(), lr=self._cfgs.lr)
+            
+    def forward(self, obs: torch.Tensor) -> torch.Tensor:
+        """Estimate the sum of previous compensating actions.
+
+        Args:
+            obs (torch.Tensor): The input observation.
+
+        Returns:
+            torch.Tensor: The estimation of previous compensating actions.
+        """
+        return self.model(obs)
+
+    def train(self, observation: torch.Tensor, approx_compensating_act: torch.Tensor, compensating_act: torch.Tensor) -> torch.Tensor:
+        """Train the barrier compensator model.
+
+        This method updates the model parameters to minimize the difference between the model's output and the
+        target, which is a combination of approximate compensating action and compensating action.
+
+        Args:
+            observation (torch.Tensor): The observation data.
+            approx_compensating_act (torch.Tensor): The approximate compensating action.
+            compensating_act (torch.Tensor): The actual compensating action.
+
+        Returns:
+            torch.Tensor: The loss after training.
+        """
+        # Train the model
+        for _ in range(self._cfgs.update_iters):
+            target = approx_compensating_act + compensating_act
+            self.optimizer.zero_grad()
+            loss = torch.pow((self(observation)-target), 2).mean()
+            loss.backward()
+            self.optimizer.step()
+            
+        return loss
diff --git a/omnisafe/common/barrier_solver.py b/omnisafe/common/barrier_solver.py
new file mode 100644
index 000000000..1c11ffffb
--- /dev/null
+++ b/omnisafe/common/barrier_solver.py
@@ -0,0 +1,251 @@
+# Copyright 2023 OmniSafe Team. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Implementation of the Control Barrier Function Solver."""
+
+from __future__ import annotations
+import warnings
+warnings.filterwarnings("ignore")
+import numpy as np
+import torch
+from cvxopt import matrix
+from cvxopt import solvers
+from sklearn.gaussian_process import GaussianProcessRegressor
+from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C
+
+class PendulumSolver:
+    """Solver for the pendulum problem using Gaussian Process models.
+
+    Attributes:
+        action_size (int): Size of the action space.
+        observation_size (int): Size of the observation space.
+        torque_bound (float): Maximum torque bound.
+        max_speed (float): Maximum speed of the pendulum.
+        device (str): Device to run the computations on.
+    """
+
+    def __init__(self, action_size: int = 1, observation_size: int = 3, 
+                 torque_bound: float = 15., max_speed: float = 60., 
+                 device: str = 'cpu') -> None:
+        """Initializes the PendulumSolver with specified parameters.
+
+        Args:
+            action_size (int): Size of the action space.
+            observation_size (int): Size of the observation space.
+            torque_bound (float): Maximum torque bound.
+            max_speed (float): Maximum speed of the pendulum.
+            device (str): Device to run the computations on.
+        """
+        self.action_size = action_size
+        self.observation_size = observation_size
+        self.torque_bound = torque_bound
+        self.max_speed = max_speed
+        self.F = 1.0
+        self._device = device
+        self._gamma_b = 0.5
+        self._kd = 1.5
+        self._build_barrier()
+        self.build_GP_model()
+        self.GP_model_prev = None
+
+    def build_GP_model(self) -> None:
+        """Builds the Gaussian Process model."""
+        gp_list = []
+        noise = 0.01
+        for _ in range(self.observation_size - 1):
+            kern = C(1.0, (1e-3, 1e3)) * RBF(10, (1e-2, 1e2))
+            gp = GaussianProcessRegressor(kernel=kern, alpha=noise, n_restarts_optimizer=10)
+            gp_list.append(gp)
+        self.GP_model = gp_list
+
+    def _build_barrier(self) -> None:
+        """Builds the barrier for the pendulum solver."""
+        self.P = matrix(np.diag([1., 1e16]), tc='d')
+        self.q = matrix(np.zeros(self.action_size + 1))
+        self.h1 = np.array([1, 0.01])
+        self.h2 = np.array([1, -0.01])
+        self.h3 = np.array([-1, 0.01])
+        self.h4 = np.array([-1, -0.01])
+
+    def control_barrier(self, original_action: torch.Tensor, f: np.ndarray, g: np.ndarray, x: np.ndarray, std: np.ndarray) -> torch.Tensor:
+        """
+        Adjusts the original action using a control barrier function to ensure
+        that the action complies with the system's physical constraints.
+
+        Args:
+            original_action (torch.Tensor): The original action proposed by the RL algorithm.
+            f (np.ndarray): The drift component of the system's dynamics.
+            g (np.ndarray): The control component of the system's dynamics.
+            x (np.ndarray): The current state of the system.
+            std (np.ndarray): The standard deviation of the system's state.
+
+        Returns:
+            torch.Tensor: The adjusted action that respects the system's constraints.
+        """
+
+        # Define gamma for the barrier function
+        gamma_b = 0.5
+        kd = 1.5
+        u_rl = original_action.detach().numpy()
+        # u_rl*=self.torque_bound
+
+        # Set up Quadratic Program to satisfy Control Barrier Function
+        G = np.array(
+            [
+                [
+                    -np.dot(self.h1, g), 
+                    -np.dot(self.h2, g), 
+                    -np.dot(self.h3, g), 
+                    -np.dot(self.h4, g), 
+                    1,
+                    -1, 
+                    g[1], 
+                    -g[1]
+                ], 
+                [
+                    -1, 
+                    -1, 
+                    -1, 
+                    -1, 
+                    0, 
+                    0, 
+                    0, 
+                    0
+                ]
+            ]
+        )
+        G = np.transpose(G)
+        h = np.array(
+            [
+                gamma_b * self.F + np.dot(self.h1, f) + np.dot(self.h1, g) * u_rl - (1 - gamma_b) * np.dot(self.h1, x) - kd * np.abs(np.dot(self.h1, std)),
+                    gamma_b * self.F + np.dot(self.h2, f) + np.dot(self.h2, g) * u_rl - (1 - gamma_b) * np.dot(self.h2, x) - kd * np.abs(np.dot(self.h2, std)),
+                    gamma_b * self.F + np.dot(self.h3, f) + np.dot(self.h3, g) * u_rl - (1 - gamma_b) * np.dot(self.h3, x) - kd * np.abs(np.dot(self.h3, std)),
+                    gamma_b * self.F + np.dot(self.h4, f) + np.dot(self.h4, g) * u_rl - (1 - gamma_b) * np.dot(self.h4, x) - kd * np.abs(np.dot(self.h4, std)),
+                    -u_rl + self.torque_bound,
+                    u_rl + self.torque_bound,
+                    -f[1] - g[1] * u_rl + self.max_speed,
+                    f[1] + g[1] * u_rl + self.max_speed
+            ]
+        )
+        h = np.squeeze(h).astype(np.double)
+        
+        # Convert numpy arrays to cvx matrices to set up QP
+        G = matrix(G, tc='d')
+        h = matrix(h, tc='d')
+        solvers.options['show_progress'] = False
+        sol = solvers.qp(self.P, self.q, G, h)
+        u_bar = sol['x']
+
+        # Check if the adjusted action is within bounds
+        if np.add(np.squeeze(u_rl), np.squeeze(u_bar[0])) - 0.001 >= self.torque_bound:
+            u_bar[0] = self.torque_bound - u_rl
+            print("Error in QP")
+        elif np.add(np.squeeze(u_rl), np.squeeze(u_bar[0])) + 0.001 <= -self.torque_bound:
+            u_bar[0] = -self.torque_bound - u_rl
+            print("Error in QP")
+
+        return torch.as_tensor(u_bar[0], dtype=torch.float32, device=self._device).unsqueeze(dim=0)
+
+    def get_dynamics(self, obs: list[float], original_action: float) -> np.ndarray:
+        """
+        Calculates the dynamics of the system based on the current observation
+        and the original action.
+
+        Args:
+            obs (list[float]): The current observation of the system state.
+            original_action (float): The original action proposed by the RL algorithm.
+
+        Returns:
+            np.ndarray: The calculated dynamics of the system.
+        """
+
+        dt = 0.05  # Time step
+        G = 10  # Gravitational constant
+        m = 2  # Mass
+        l = 2  # Length
+
+        theta = np.arctan2(obs[1], obs[0])  # Calculate the angle
+        theta_dot = obs[2]  # Angular velocity
+
+        # Dynamics equations
+        f = np.array([-3 * G / (2 * l) * np.sin(theta + np.pi) * dt**2 + theta_dot * dt + theta + 3 / (m * l**2) * original_action * dt**2,
+                    theta_dot - 3 * G / (2 * l) * np.sin(theta + np.pi) * dt + 3 / (m * l**2) * original_action * dt])
+
+        return np.squeeze(f)
+
+    def update_GP_dynamics(self, obs: np.ndarray, act: np.ndarray) -> None:
+        """
+        Updates the Gaussian Process (GP) dynamics model based on observed states and actions.
+
+        Args:
+            obs (np.ndarray): Observed states.
+            act (np.ndarray): Actions taken.
+        """
+        obs=obs.detach().cpu().squeeze().numpy()
+        act=act.detach().cpu().squeeze().numpy()
+        N = self.observation_size
+        X = obs
+        U = act
+        L = len(X)
+        err = np.zeros((L-1, N-1))
+        S = np.zeros((L-1, 2))
+        for i in range(L-1):
+            f = self.get_dynamics(X[i], U[i])
+            theta_p = np.arctan2(X[i][1], X[i][0])
+            theta_dot_p = X[i][2]
+            theta = np.arctan2(X[i+1][1], X[i+1][0])
+            theta_dot = X[i+1][2]
+            S[i, :] = np.array([theta_p, theta_dot_p])
+            err[i, :] = np.array([theta, theta_dot]) - f
+        self.GP_model[0].fit(S, err[:, 0])
+        self.GP_model[1].fit(S, err[:, 1])
+
+    def get_GP_dynamics(self, obs: torch.Tensor, use_prev_model: bool) -> list[np.ndarray]:
+        """
+        Retrieves the GP dynamics based on the current observation.
+
+        Args:
+            obs (torch.Tensor): Current state observation.
+
+        Returns:
+            list[np.ndarray]: list containing the GP dynamics [f, g, x, std].
+        """
+        obs = obs.cpu().detach().numpy()
+        u_rl = 0
+        dt = 0.05
+        G = 10
+        m = 1
+        l = 1
+        obs = np.squeeze(obs)
+        theta = np.arctan2(obs[1], obs[0])
+        theta_dot = obs[2]
+        x = np.array([theta, theta_dot]) # 这个x估计就对应state
+        f_nom = np.array(
+            [
+                -3*G/(2*l)*np.sin(theta + np.pi)*dt**2 + theta_dot*dt + theta + 3/(m*l**2)*u_rl*dt**2, 
+                theta_dot - 3*G/(2*l)*np.sin(theta + np.pi)*dt + 3/(m*l**2)*u_rl*dt
+            ]
+        )
+        g = np.array([3/(m*l**2)*dt**2, 3/(m*l**2)*dt])
+        f_nom = np.squeeze(f_nom)
+        f = np.zeros(2)
+        if use_prev_model:
+            [m1, std1] = self.GP_model_prev[0].predict(x.reshape(1,-1), return_std=True)
+            [m2, std2] = self.GP_model_prev[1].predict(x.reshape(1,-1), return_std=True)
+        else:
+            [m1, std1] = self.GP_model[0].predict(x.reshape(1, -1), return_std=True)
+            [m2, std2] = self.GP_model[1].predict(x.reshape(1, -1), return_std=True)
+        f[0] = f_nom[0] + m1
+        f[1] = f_nom[1] + m2
+        return [np.squeeze(f), np.squeeze(g), np.squeeze(x), np.array([np.squeeze(std1), np.squeeze(std2)])]
diff --git a/omnisafe/common/buffer/onpolicy_buffer.py b/omnisafe/common/buffer/onpolicy_buffer.py
index b6f9586df..6fab686aa 100644
--- a/omnisafe/common/buffer/onpolicy_buffer.py
+++ b/omnisafe/common/buffer/onpolicy_buffer.py
@@ -216,17 +216,7 @@ def get(self) -> dict[str, torch.Tensor]:
             The data stored and calculated in the buffer.
         """
         self.ptr, self.path_start_idx = 0, 0
-
-        data = {
-            'obs': self.data['obs'],
-            'act': self.data['act'],
-            'target_value_r': self.data['target_value_r'],
-            'adv_r': self.data['adv_r'],
-            'logp': self.data['logp'],
-            'discounted_ret': self.data['discounted_ret'],
-            'adv_c': self.data['adv_c'],
-            'target_value_c': self.data['target_value_c'],
-        }
+        data = self.data.copy()
 
         adv_mean, adv_std, *_ = distributed.dist_statistics_scalar(data['adv_r'])
         cadv_mean, *_ = distributed.dist_statistics_scalar(data['adv_c'])
diff --git a/omnisafe/common/buffer/vector_onpolicy_buffer.py b/omnisafe/common/buffer/vector_onpolicy_buffer.py
index a920d8e6a..a8e2c25a8 100644
--- a/omnisafe/common/buffer/vector_onpolicy_buffer.py
+++ b/omnisafe/common/buffer/vector_onpolicy_buffer.py
@@ -87,6 +87,23 @@ def __init__(  # pylint: disable=super-init-not-called,too-many-arguments
             )
             for _ in range(num_envs)
         ]
+        
+    def add_field(self, name: str, shape: tuple[int, ...], dtype: torch.dtype) -> None:
+        """Add a field to the buffer.
+
+        Examples:
+            >>> buffer = BaseBuffer(...)
+            >>> buffer.add_field('new_field', (2, 3), torch.float32)
+            >>> buffer.data['new_field'].shape
+            >>> (buffer.size, 2, 3)
+
+        Args:
+            name (str): The name of the field.
+            shape (tuple of int): The shape of the field.
+            dtype (torch.dtype): The dtype of the field.
+        """
+        for buffer in self.buffers:
+            buffer.add_field(name=name, shape=shape, dtype=dtype)
 
     @property
     def num_buffers(self) -> int:
diff --git a/omnisafe/common/robust_barrier_solver.py b/omnisafe/common/robust_barrier_solver.py
new file mode 100644
index 000000000..80d8d33b6
--- /dev/null
+++ b/omnisafe/common/robust_barrier_solver.py
@@ -0,0 +1,428 @@
+import numpy as np
+import torch
+from cvxopt import matrix
+from cvxopt import solvers
+from omnisafe.common.utils import to_tensor, prRed, sort_vertices_cclockwise
+from qpth.qp import QPFunction
+
+DYNAMICS_MODE = {'Unicycle': {'n_s': 3, 'n_u': 2},   # state = [x y θ]
+                 'SimulatedCars': {'n_s': 10, 'n_u': 1},  # state = [x y θ v ω]
+                 'Pvtol': {'n_s': 6, 'n_u': 2},  # state = [x y θ v_x v_y thrust]
+                 'Pendulum-v1': {'n_s': 3, 'n_u': 1}
+                 }  
+
+
+class CBFQPLayer:
+
+    def __init__(self, env, device='cpu', gamma_b=20, k_d=3.0, l_p=0.03):
+        """Constructor of CBFLayer.
+
+        Parameters
+        ----------
+        env : gym.env
+            Gym environment.
+        gamma_b : float, optional
+            gamma of control barrier certificate.
+        k_d : float, optional
+            confidence parameter desired (2.0 corresponds to ~95% for example).
+        """
+
+        self.device = torch.device(device)
+
+        self.env = env
+        self.u_min, self.u_max = self.get_control_bounds()
+        self.gamma_b = gamma_b
+        
+        self.k_d = k_d
+        self.l_p = l_p
+
+        self.action_dim = env.action_space.shape[0]
+
+    def get_safe_action(self, state_batch, action_batch, mean_pred_batch, sigma_batch, modular=False, cbf_info_batch=None): # TODO: 迁移的核心在于此，把它用CBF的方法来改写就好
+        """
+
+        Parameters
+        ----------
+        state_batch : torch.tensor or ndarray
+        action_batch : torch.tensor or ndarray
+            State batch
+        mean_pred_batch : torch.tensor or ndarray
+            Mean of disturbance
+        sigma_batch : torch.tensor or ndarray
+            Standard deviation of disturbance
+
+        Returns
+        -------
+        final_action_batch : torch.tensor
+            Safe actions to take in the environment.
+        """
+
+        # batch form if only a single data point is passed
+        expand_dims = len(state_batch.shape) == 1
+        if expand_dims:
+            action_batch = action_batch.unsqueeze(0)
+            state_batch = state_batch.unsqueeze(0)
+            mean_pred_batch = mean_pred_batch.unsqueeze(0)
+            sigma_batch = sigma_batch.unsqueeze(0)
+            if cbf_info_batch is not None:
+                cbf_info_batch = cbf_info_batch.unsqueeze(0)
+
+        if modular:
+            final_action = torch.clamp(action_batch, self.u_min.repeat(action_batch.shape[0], 1), self.u_max.repeat(action_batch.shape[0], 1))
+        else:
+            Ps, qs, Gs, hs = self.get_cbf_qp_constraints(state_batch, action_batch, mean_pred_batch, sigma_batch, modular=modular, cbf_info_batch=cbf_info_batch)
+            
+            Ps, qs, Gs, hs = Ps.detach().cpu().numpy(), qs.detach().cpu().numpy(), Gs.detach().cpu().numpy(), hs.detach().cpu().numpy()
+            batch_size = Ps.shape[0]
+            safe_actions = []
+            for i in range(batch_size):
+                Ps_m = matrix(np.diag([1., 1e16]), tc='d')
+                qs_m = matrix(np.zeros(2))
+                Gs_m = matrix(np.float64(Gs[i]), tc='d')
+                hs_m = matrix(np.float64(hs[i]), tc='d')
+                solvers.options['show_progress'] = False
+                sol = solvers.qp(Ps_m, qs_m, Gs_m, hs_m)
+                safe_action=torch.as_tensor(sol['x'][0], dtype=torch.float32)
+                safe_actions.append(safe_action)
+            safe_action_batch = torch.as_tensor(safe_actions, dtype=torch.float32, device=self.device).unsqueeze(-1)
+            
+            # print(action_batch.shape, safe_action_batch.shape)
+            # safe_action_batch = self.solve_qp(Ps, qs, Gs, hs)
+            final_action = torch.clamp(action_batch + safe_action_batch, self.u_min.repeat(action_batch.shape[0], 1), self.u_max.repeat(action_batch.shape[0], 1))
+
+        return final_action if not expand_dims else final_action.squeeze(0)
+
+    def solve_qp(self, Ps: torch.Tensor, qs: torch.Tensor, Gs: torch.Tensor, hs: torch.Tensor):
+        """Solves:
+            minimize_{u,eps} 0.5 * u^T P u + q^T u
+                subject to G[u,eps]^T <= h
+
+        Parameters
+        ----------
+        Ps : torch.Tensor
+            (batch_size, n_u+1, n_u+1)
+        qs : torch.Tensor
+            (batch_size, n_u+1)
+        Gs : torch.Tensor
+            (batch_size, num_ineq_constraints, n_u+1)
+        hs : torch.Tensor
+            (batch_size, num_ineq_constraints)
+        Returns
+        -------
+        safe_action_batch : torch.tensor
+            The solution of the qp without the last dimension (the slack).
+        """
+
+        Ghs = torch.cat((Gs, hs.unsqueeze(2)), -1)
+        Ghs_norm = torch.max(torch.abs(Ghs), dim=2, keepdim=True)[0]
+        Gs /= Ghs_norm
+        hs = hs / Ghs_norm.squeeze(-1)
+        sol = self.cbf_layer(Ps, qs, Gs, hs, solver_args={"check_Q_spd": False, "maxIter": 100000, "notImprovedLim": 10, "eps": 1e-4})
+        safe_action_batch = sol[:, :self.env.action_space.shape[0]]
+        return safe_action_batch
+
+    def cbf_layer(self, Qs, ps, Gs, hs, As=None, bs=None, solver_args=None):
+        """
+
+        Parameters
+        ----------
+        Qs : torch.Tensor
+        ps : torch.Tensor
+        Gs : torch.Tensor
+            shape (batch_size, num_ineq_constraints, num_vars)
+        hs : torch.Tensor
+            shape (batch_size, num_ineq_constraints)
+        As : torch.Tensor, optional
+        bs : torch.Tensor, optional
+        solver_args : dict, optional
+
+        Returns
+        -------
+        result : torch.Tensor
+            Result of QP
+        """
+
+        if solver_args is None:
+            solver_args = {}
+
+        if As is None or bs is None:
+            As = torch.Tensor().to(self.device).double()
+            bs = torch.Tensor().to(self.device).double()
+
+        result = QPFunction(verbose=-1, **solver_args)(Qs.double(), ps.double(), Gs.double(), hs.double(), As, bs).float()
+        if torch.any(torch.isnan(result)):
+            prRed('QP Failed to solve - result is nan == {}!'.format(torch.any(torch.isnan(result))))
+            raise Exception('QP Failed to solve')
+        return result
+
+    def get_cbf_qp_constraints(self, state_batch, action_batch, mean_pred_batch, sigma_pred_batch, modular=False, cbf_info_batch=None): # TODO: 解耦合的核心在这里
+        """Build up matrices required to solve qp
+        
+        Program specifically solves:
+            minimize_{u,eps} 0.5 * u^T P u + q^T u
+                subject to G[u,eps]^T <= h
+
+        Each control barrier certificate is of the form:
+            dh/dx^T (f_out + g_out u) >= -gamma^b h_out^3 where out here is an output of the state.
+
+        In the case of SafetyGym_point dynamics:
+        state = [x y θ v ω]
+        state_d = [v*cos(θ) v*sin(θ) omega ω u^v u^ω]
+
+        Quick Note on batch matrix multiplication for matrices A and B:
+            - Batch size should be first dim
+            - Everything needs to be 3-dimensional
+            - E.g. if B is a vec, i.e. shape (batch_size, vec_length) --> .view(batch_size, vec_length, 1)
+
+        Parameters
+        ----------
+        state_batch : torch.tensor
+            current state (check dynamics.py for details on each dynamics' specifics)
+        action_batch : torch.tensor
+            Nominal control input.
+        mean_pred_batch : torch.tensor
+            mean disturbance prediction state, dimensions (n_s, n_u)
+        sigma_pred_batch : torch.tensor
+            standard deviation in additive disturbance after undergoing the output dynamics.
+        gamma_b : float, optional
+            CBF parameter for the class-Kappa function
+
+        Returns
+        -------
+        P : torch.tensor
+            Quadratic cost matrix in qp (minimize_{u,eps} 0.5 * u^T P u + q^T u)
+        q : torch.tensor
+            Linear cost vector in qp (minimize_{u,eps} 0.5 * u^T P u + q^T u)
+        G : torch.tensor
+            Inequality constraint matrix (G[u,eps] <= h) of size (num_constraints, n_u + 1)
+        h : torch.tensor
+            Inequality constraint vector (G[u,eps] <= h) of size (num_constraints,)
+        """
+
+        assert len(state_batch.shape) == 2 and len(action_batch.shape) == 2 and len(mean_pred_batch.shape) == 2 and len(sigma_pred_batch.shape) == 2, print(state_batch.shape, action_batch.shape, mean_pred_batch.shape, sigma_pred_batch.shape)
+
+        batch_size = state_batch.shape[0]
+        gamma_b = self.gamma_b
+
+        # Expand dims
+        state_batch = torch.unsqueeze(state_batch, -1).to(self.device)
+        action_batch = torch.unsqueeze(action_batch, -1).to(self.device)
+        mean_pred_batch = torch.unsqueeze(mean_pred_batch, -1).to(self.device)
+        sigma_pred_batch = torch.unsqueeze(sigma_pred_batch, -1).to(self.device)
+
+        if self.env.dynamics_mode == 'Pendulum':
+            num_constraints = 8
+            n_u = action_batch.shape[1]  # dimension of control inputs
+            # Inequality constraints (G[u, eps] <= h)
+            G = torch.zeros((batch_size, num_constraints, n_u + 1)).to(self.device)  # the extra variable is for epsilon (to make sure qp is always feasible)
+            h = torch.zeros((batch_size, num_constraints)).to(self.device)
+            
+            h1 = torch.FloatTensor([1, 0.01]).unsqueeze(-1).to(self.device)
+            h2 = torch.FloatTensor([1, -0.01]).unsqueeze(-1).to(self.device)
+            h3 = torch.FloatTensor([-1, 0.01]).unsqueeze(-1).to(self.device)
+            h4 = torch.FloatTensor([-1, -0.01]).unsqueeze(-1).to(self.device)
+            action_batch_scaled=(action_batch*15.0).squeeze(-1).to(self.device) # TODO: 写的好看点
+            
+            theta = state_batch[:,0,:].squeeze(-1)
+            theta_dot = state_batch[:,1,:].squeeze(-1)
+            f_norm = torch.zeros(batch_size, 2).to(self.device)
+            # theta [batch_size, 1]
+            f_norm[:, 0] = -3*10/2*torch.sin(theta+torch.pi)*self.env.dt + theta
+            f_norm[: ,1] = theta_dot - 3*10/2*torch.sin(theta+torch.pi)
+            
+            g = torch.tensor([3*self.env.dt**2, 3*self.env.dt]).unsqueeze(0).to(self.device)
+            
+            f = torch.zeros_like(f_norm).to(self.device)
+            f[:, 0] = f_norm[:, 0] + mean_pred_batch[:,0,:].squeeze(-1)
+            f[:, 1] = f_norm[:, 1] + mean_pred_batch[:,1,:].squeeze(-1)
+            G = torch.tensor(
+                [
+                    [
+                        -torch.matmul(g, h1), 
+                        -torch.matmul(g, h2), 
+                        -torch.matmul(g, h3), 
+                        -torch.matmul(g, h4), 
+                        1,
+                        -1,
+                        g[:, 1],
+                        -g[:, 1]
+                    ],
+                    [
+                        -1, 
+                        -1, 
+                        -1, 
+                        -1, 
+                        0, 
+                        0, 
+                        0, 
+                        0
+                    ]
+                ]
+            ).transpose(0, 1).repeat(batch_size, 1, 1).to(self.device)
+            state_batch_squeeze = state_batch.squeeze(-1)
+            sigma_pred_batch_squeeze = sigma_pred_batch.squeeze(-1)
+
+            h = torch.cat(
+                [
+                    self.gamma_b + torch.matmul(f, h1) + torch.matmul(g, h1) * action_batch_scaled - (1 - self.gamma_b) * torch.matmul(state_batch_squeeze, h1) - self.k_d * torch.abs(torch.matmul(sigma_pred_batch_squeeze, h1)),
+                    self.gamma_b + torch.matmul(f, h2) + torch.matmul(g, h2) * action_batch_scaled - (1 - self.gamma_b) * torch.matmul(state_batch_squeeze, h2) - self.k_d * torch.abs(torch.matmul(sigma_pred_batch_squeeze, h2)),
+                    self.gamma_b + torch.matmul(f, h3) + torch.matmul(g, h3) * action_batch_scaled - (1 - self.gamma_b) * torch.matmul(state_batch_squeeze, h3) - self.k_d * torch.abs(torch.matmul(sigma_pred_batch_squeeze, h3)),
+                    self.gamma_b + torch.matmul(f, h4) + torch.matmul(g, h4) * action_batch_scaled - (1 - self.gamma_b) * torch.matmul(state_batch_squeeze, h4) - self.k_d * torch.abs(torch.matmul(sigma_pred_batch_squeeze, h4)),
+                    -action_batch_scaled + 15.0,
+                    action_batch_scaled + 15.0,
+                    -f[:, 1].unsqueeze(-1) - g[:, 1] * action_batch_scaled + 60.0,
+                    f[:, 1].unsqueeze(-1) + g[:, 1] * action_batch_scaled + 60.0
+                ],
+                dim=1
+            ).to(self.device)
+            P = torch.diag(torch.tensor([1.e0, 1e16])).repeat(batch_size, 1, 1).to(self.device)
+            q = torch.zeros((batch_size, self.action_dim + 1)).to(self.device)
+        
+        elif self.env.dynamics_mode == 'Unicycle':
+
+            num_cbfs = len(self.env.hazards)
+            l_p = self.l_p
+            buffer = 0.1
+
+            thetas = state_batch[:, 2, :].squeeze(-1)
+            c_thetas = torch.cos(thetas)
+            s_thetas = torch.sin(thetas)
+
+            # p(x): lookahead output (batch_size, 2)
+            ps = torch.zeros((batch_size, 2)).to(self.device)
+            ps[:, 0] = state_batch[:, 0, :].squeeze(-1) + l_p * c_thetas
+            ps[:, 1] = state_batch[:, 1, :].squeeze(-1) + l_p * s_thetas
+
+            # p_dot(x) = f_p(x) + g_p(x)u + D_p where f_p(x) = 0,  g_p(x) = RL and D_p is the disturbance
+
+            # f_p(x) = [0,...,0]^T
+            f_ps = torch.zeros((batch_size, 2, 1)).to(self.device)
+
+            # g_p(x) = RL where L = diag([1, l_p])
+            Rs = torch.zeros((batch_size, 2, 2)).to(self.device)
+            Rs[:, 0, 0] = c_thetas
+            Rs[:, 0, 1] = -s_thetas
+            Rs[:, 1, 0] = s_thetas
+            Rs[:, 1, 1] = c_thetas
+            Ls = torch.zeros((batch_size, 2, 2)).to(self.device)
+            Ls[:, 0, 0] = 1
+            Ls[:, 1, 1] = l_p
+            g_ps = torch.bmm(Rs, Ls)  # (batch_size, 2, 2)
+
+            # D_p(x) = g_p [0 D_θ]^T + [D_x1 D_x2]^T
+            mu_theta_aug = torch.zeros([batch_size, 2, 1]).to(self.device)
+            mu_theta_aug[:, 1, :] = mean_pred_batch[:, 2, :]
+            mu_ps = torch.bmm(g_ps, mu_theta_aug) + mean_pred_batch[:, :2, :]
+            sigma_theta_aug = torch.zeros([batch_size, 2, 1]).to(self.device)
+            sigma_theta_aug[:, 1, :] = sigma_pred_batch[:, 2, :]
+            sigma_ps = torch.bmm(torch.abs(g_ps), sigma_theta_aug) + sigma_pred_batch[:, :2, :]
+
+            # Build RCBFs
+            hs = 1e3 * torch.ones((batch_size, num_cbfs), device=self.device)  # the RCBF itself
+            dhdps = torch.zeros((batch_size, num_cbfs, 2), device=self.device)
+            hazards = self.env.hazards
+            for i in range(len(hazards)):
+                if hazards[i]['type'] == 'circle':  # 1/2 * (||ps - x_obs||^2 - r^2)
+                    obs_loc = to_tensor(hazards[i]['location'], torch.FloatTensor, self.device)
+                    hs[:, i] = 0.5 * (torch.sum((ps - obs_loc)**2, dim=1) - (hazards[i]['radius'] + buffer)**2)
+                    dhdps[:, i, :] = (ps - obs_loc)
+                elif hazards[i]['type'] == 'polygon':  # max_j(h_j) where h_j = 1/2 * (dist2seg_j)^2
+                    vertices = sort_vertices_cclockwise(hazards[i]['vertices'])  # (n_v, 2)
+                    segments = np.diff(vertices, axis=0,
+                                       append=vertices[[0]])  # (n_v, 2) at row i contains vector from v_i to v_i+1
+                    segments = to_tensor(segments, torch.FloatTensor, self.device)
+                    vertices = to_tensor(vertices, torch.FloatTensor, self.device)
+                    # Get max RBCF TODO: Can be optimized
+                    for j in range(segments.shape[0]):
+                        # Compute Distances to segment
+                        dot_products = torch.matmul(ps - vertices[j:j + 1], segments[j]) / torch.sum(
+                            segments[j] ** 2)  # (batch_size,)
+                        mask0_ = dot_products < 0  # if <0 closest point on segment is vertex j
+                        mask1_ = dot_products > 1  # if >0 closest point on segment is vertex j+1
+                        mask_ = torch.logical_and(dot_products >= 0,
+                                                  dot_products <= 1)  # Else find distance to line l_{v_j, v_j+1}
+                        # Compute Distances
+                        dists2seg = torch.zeros((batch_size))
+                        if mask0_.sum() > 0:
+                            dists2seg[mask0_] = torch.linalg.norm(ps[mask0_] - vertices[[j]], dim=1)
+                        if mask1_.sum() > 0:
+                            dists2seg[mask1_] = torch.linalg.norm(ps[mask1_] - vertices[[(j + 1) % segments.shape[0]]], dim=1)
+                        if mask_.sum() > 0:
+                            dists2seg[mask_] = torch.linalg.norm(
+                                dot_products[mask_, None] * segments[j].tile((torch.sum(mask_), 1)) + vertices[[j]] -
+                            ps[mask_], dim=1)
+                        # Compute hs_ for this segment
+                        hs_ = 0.5 * ((dists2seg ** 2) + 0.5*buffer)  # (batch_size,)
+                        # Compute dhdps TODO: Can be optimized to only compute for indices that need updating
+                        dhdps_ = torch.zeros((batch_size, 2))
+                        if mask0_.sum() > 0:
+                            dhdps_[mask0_] = ps[mask0_] - vertices[[j]]
+                        if mask1_.sum() > 0:
+                            dhdps_[mask1_] = ps[mask1_] - vertices[[(j + 1) % segments.shape[0]]]
+                        if mask_.sum() > 0:
+                            normal_vec = torch.tensor([segments[j][1], -segments[j][0]])
+                            normal_vec /= torch.linalg.norm(normal_vec)
+                            dhdps_[mask_] = (ps[mask_]-vertices[j]).matmul(normal_vec) * normal_vec.view((1,2)).repeat(torch.sum(mask_), 1)  # dot products (batch_size, 1)
+                        # Find indices to update (closest segment basically, worst case -> CBF boolean and is a min)
+                        idxs_to_update = torch.nonzero(hs[:, i] - hs_ > 0)
+                        # Update the actual hs to be used in the constraints
+                        if idxs_to_update.shape[0] > 0:
+                            hs[idxs_to_update, i] = hs_[idxs_to_update]
+                            # Compute dhdhps for those indices
+                            dhdps[idxs_to_update, i, :] = dhdps_[idxs_to_update, :]
+                else:
+                    raise Exception('Only obstacles of type `circle` or `polygon` are supported, got: {}'.format(hazards[i]['type']))
+
+            n_u = action_batch.shape[1]  # dimension of control inputs
+            num_constraints = num_cbfs + 2 * n_u  # each cbf is a constraint, and we need to add actuator constraints (n_u of them)
+
+            # Inequality constraints (G[u, eps] <= h)
+            G = torch.zeros((batch_size, num_constraints, n_u + 1)).to(self.device)  # the extra variable is for epsilon (to make sure qp is always feasible)
+            h = torch.zeros((batch_size, num_constraints)).to(self.device)
+            ineq_constraint_counter = 0
+
+            # Add inequality constraints
+            G[:, :num_cbfs, :n_u] = -torch.bmm(dhdps, g_ps)  # h1^Tg(x)
+            G[:, :num_cbfs, n_u] = -1  # for slack
+            h[:, :num_cbfs] = gamma_b * (hs ** 3) + (torch.bmm(dhdps, f_ps + mu_ps) - torch.bmm(torch.abs(dhdps), sigma_ps) + torch.bmm(torch.bmm(dhdps, g_ps), action_batch)).squeeze(-1)
+            ineq_constraint_counter += num_cbfs
+
+            # Let's also build the cost matrices, vectors to minimize control effort and penalize slack
+            P = torch.diag(torch.tensor([1.e0, 1.e-2, 1e5])).repeat(batch_size, 1, 1).to(self.device)
+            q = torch.zeros((batch_size, n_u + 1)).to(self.device)
+
+        # Add Actuator Constraints
+        n_u = action_batch.shape[1]  # dimension of control inputs
+
+        for c in range(n_u):
+
+            # u_max >= u_nom + u ---> u <= u_max - u_nom
+            if self.u_max is not None:
+                G[:, ineq_constraint_counter, c] = 1
+                h[:, ineq_constraint_counter] = self.u_max[c] - action_batch[:, c].squeeze(-1)
+                ineq_constraint_counter += 1
+
+            # u_min <= u_nom + u ---> -u <= u_min - u_nom
+            if self.u_min is not None:
+                G[:, ineq_constraint_counter, c] = -1
+                h[:, ineq_constraint_counter] = -self.u_min[c] + action_batch[:, c].squeeze(-1)
+                ineq_constraint_counter += 1
+
+        return P, q, G, h
+
+    def get_control_bounds(self):
+        """
+
+        Returns
+        -------
+        u_min : torch.tensor
+            min control input.
+        u_max : torch.tensor
+            max control input.
+        """
+
+        u_min = torch.tensor(self.env.safe_action_space.low).to(self.device)
+        u_max = torch.tensor(self.env.safe_action_space.high).to(self.device)
+
+        return u_min, u_max
+    
\ No newline at end of file
diff --git a/omnisafe/common/robust_gp_model.py b/omnisafe/common/robust_gp_model.py
new file mode 100644
index 000000000..2824faf12
--- /dev/null
+++ b/omnisafe/common/robust_gp_model.py
@@ -0,0 +1,498 @@
+""" Adapted almost directly from:
+https://docs.gpytorch.ai/en/stable/examples/02_Scalable_Exact_GPs/Simple_GP_Regression_CUDA.html
+
+Training is performed rapidly (and exactly) using GPUs and prediction is done very rapidly using LOVE.
+"""
+
+import torch
+import numpy as np
+import gpytorch
+import warnings
+warnings.filterwarnings('ignore')
+from omnisafe.common.utils import to_tensor, to_numpy
+
+DYNAMICS_MODE = {'Unicycle': {'n_s': 3, 'n_u': 2},   # state = [x y θ]
+                 'SimulatedCars': {'n_s': 10, 'n_u': 1},  # state = [x y θ v ω]
+                 'Pvtol': {'n_s': 6, 'n_u': 2},  # state = [x y θ v_x v_y thrust]
+                 'Pendulum': {'n_s': 2, 'n_u': 1}
+                 }
+MAX_STD = {'Unicycle': [2e-1, 2e-1, 2e-1], 'SimulatedCars': [0, 0.2, 0, 0.2, 0, 0.2, 0, 0.2, 0, 0.2],  'Pvtol': [0, 0, 0, 0, 0, 0],  'Pendulum': [0.1, 0.1, 0.1]}
+
+
+class BaseGPy(gpytorch.models.ExactGP):
+
+    def __init__(self, train_x, train_y, prior_std, likelihood):
+        super().__init__(train_x, train_y, likelihood)
+        self.mean_module = gpytorch.means.ZeroMean()
+        self.covar_module = gpytorch.kernels.ScaleKernel(
+                            gpytorch.kernels.RBFKernel(lengthscale_prior=gpytorch.priors.NormalPrior(1e5, 1e-5)),
+                            outputscale_prior=gpytorch.priors.NormalPrior(prior_std + 1e-6, 1e-5))
+        # Initialize lengthscale and outputscale to mean of priors
+        self.covar_module.base_kernel.lengthscale = 1e5
+        self.covar_module.outputscale = prior_std + 1e-6
+
+    def forward(self, x):
+        mean = self.mean_module(x)
+        covar = self.covar_module(x)
+        return gpytorch.distributions.MultivariateNormal(mean, covar)
+
+class GPyDisturbanceEstimator:
+    """
+    A wrapper around teh BaseGPy model above.
+    """
+
+    def __init__(self, train_x, train_y, prior_std, likelihood=None, device=None):
+
+        if device:
+            self.device = device
+        else:
+            self.device = torch.device("cpu")
+
+        if not torch.is_tensor(train_x):
+            train_x = to_tensor(train_x, torch.FloatTensor, self.device)
+        if not torch.is_tensor(train_y):
+            train_y = to_tensor(train_y, torch.FloatTensor, self.device)
+        self.train_x = train_x
+        self.train_y = train_y
+
+        if not likelihood:
+            likelihood = gpytorch.likelihoods.GaussianLikelihood()
+        self.likelihood = likelihood.to(self.device)
+
+        self.model = BaseGPy(train_x, train_y, prior_std, likelihood)
+        self.model = self.model.to(self.device)
+
+    def train(self, training_iter, verbose=False):
+
+        # Find optimal model hyperparameters
+        self.model.train()
+        self.likelihood.train()
+
+        # Use the adam optimizer
+        optimizer = torch.optim.Adam(self.model.parameters(), lr=0.1)  # Includes GaussianLikelihood parameters
+
+        # "Loss" for GPs - the marginal log likelihood
+        mll = gpytorch.mlls.ExactMarginalLogLikelihood(self.likelihood, self.model)
+
+        for i in range(training_iter):
+            # Zero gradients from previous iteration
+            optimizer.zero_grad()
+            # Output from model
+            output = self.model(self.train_x)
+            # Calc loss and backprop gradients
+            loss = -mll(output, self.train_y)
+            loss.backward()
+            if verbose:
+                print('\tIter %d/%d - Loss: %.3f   lengthscale: %.3f   noise: %.3f' % (
+                    i + 1, training_iter, loss.item(),
+                    self.model.covar_module.base_kernel.lengthscale.item(),
+                    self.model.likelihood.noise.item()
+                ))
+            optimizer.step()
+
+    def predict(self, test_x):
+
+        # Convert to torch tensor
+        is_tensor = torch.is_tensor(test_x)
+        if not is_tensor:
+           test_x = to_tensor(test_x, torch.FloatTensor, self.device)
+
+        # Get into evaluation (predictive posterior) mode
+        self.model.eval()
+        self.likelihood.eval()
+
+        # Test points are regularly spaced along [0,1]
+        # Make predictions by feeding model through likelihood
+        with torch.no_grad(), gpytorch.settings.fast_pred_var():
+            observed_pred = self.likelihood(self.model(test_x))
+            pred_dict = dict()
+            pred_dict['mean'] = observed_pred.mean.cpu()
+            pred_dict['f_var'] = observed_pred.variance.cpu()
+            pred_dict['f_covar'] = observed_pred.covariance_matrix.cpu()
+            lower_ci, upper_ci = observed_pred.confidence_region()
+            pred_dict['lower_ci'] = lower_ci.cpu()
+            pred_dict['upper_ci'] = upper_ci.cpu()
+
+        # If they gave us ndarray, we give back ndarray
+        if not is_tensor:
+            for key, val in pred_dict.items():
+                pred_dict[key] = to_numpy(val)
+
+        return pred_dict
+
+class DynamicsModel:
+
+    def __init__(self, env, gp_model_size=2000, l_p=0.03, device='cpu'):
+        """Constructor of DynamicsModel.
+
+        Parameters
+        ----------
+        env : gym.env
+            Gym environment.
+        """
+
+        self.env = env
+        # Get Dynamics
+        self.get_f, self.get_g = self.get_dynamics()
+        self.n_s = DYNAMICS_MODE[self.env.dynamics_mode]['n_s']
+        self.n_u = DYNAMICS_MODE[self.env.dynamics_mode]['n_u']
+
+        # Keep Disturbance History to estimate it using GPs
+        self.disturb_estimators = None
+        self.disturbance_history = dict()
+        self.history_counter = 0  # keeping only max_history_count points in the buffer
+        self.max_history_count = gp_model_size  # How many points we want to have in the GP
+        self.disturbance_history['state'] = np.zeros((self.max_history_count, self.n_s))
+        self.disturbance_history['disturbance'] = np.zeros((self.max_history_count, self.n_s))
+        self.train_x = None  # x-data used to fit the last GP models
+        self.train_y = None  # y-data used to fit the last GP models
+
+        self.l_p = l_p
+
+        self.device = torch.device(device)
+
+    def predict_next_state(self, state_batch, u_batch, t_batch=None, use_gps=True):
+        """Given the current state and action, this function predicts the next state.
+
+        Parameters
+        ----------
+        state_batch : ndarray
+            State
+        u_batch : ndarray
+            Action
+        t_batch: ndarray, optional
+            Time batch for state dependant dynamics
+        use_gps : bool, optional
+            Use GPs to return mean and var
+
+        Returns
+        -------
+        next_state : ndarray
+            Next state
+        """
+
+        expand_dims = len(state_batch.shape) == 1
+        if expand_dims:
+            state_batch = np.expand_dims(state_batch, axis=0)
+
+        # Start with our prior for continuous time system x' = f(x) + g(x)u
+        if t_batch is not None:
+            next_state_batch = state_batch + self.env.dt * (self.get_f(state_batch, t_batch) + (self.get_g(state_batch, t_batch) @ np.expand_dims(u_batch, -1)).squeeze(-1))
+        else:
+            next_state_batch = state_batch + self.env.dt * (self.get_f(state_batch) + (self.get_g(state_batch) @ np.expand_dims(u_batch, -1)).squeeze(-1))
+
+        if use_gps:  # if we want estimate the disturbance, let's do it!
+            pred_mean, pred_std = self.predict_disturbance(state_batch)
+            next_state_batch += self.env.dt * pred_mean
+        else:
+            pred_std = np.zeros(state_batch.shape)
+
+        if expand_dims:
+            next_state_batch = next_state_batch.squeeze(0)
+            if pred_std is not None:
+                pred_std = pred_std.squeeze(0)
+
+        if t_batch is not None:
+            next_t_batch = t_batch + self.env.dt
+            return next_state_batch, self.env.dt * pred_std, next_t_batch
+
+        return next_state_batch, self.env.dt * pred_std, t_batch
+
+    def predict_next_obs(self, state, u):
+        """Predicts the next observation given the state and u. Note that this only predicts the mean next observation.
+
+        Parameters
+        ----------
+        state : ndarray
+        u : ndarray
+
+        Returns
+        -------
+        next_obs : ndarray
+            Next observation
+        """
+
+        next_state, _, _ = self.predict_next_state(state, u)
+        next_obs = self.get_obs(next_state)
+        return next_obs
+
+    def get_dynamics(self):
+        """Get affine CBFs for a given environment.
+
+        Parameters
+        ----------
+
+        Returns
+        -------
+        get_f : callable
+                Drift dynamics of the continuous system x' = f(x) + g(x)u
+        get_g : callable
+                Control dynamics of the continuous system x' = f(x) + g(x)u
+        """
+
+        if self.env.dynamics_mode == 'Unicycle':
+
+            def get_f(state_batch, t_batch=None):
+                f_x = np.zeros(state_batch.shape)
+                return f_x
+
+            def get_g(state_batch, t_batch=None):
+                theta = state_batch[:, 2]
+                g_x = np.zeros((state_batch.shape[0], 3, 2))
+                g_x[:, 0, 0] = np.cos(theta)
+                g_x[:, 1, 0] = np.sin(theta)
+                g_x[:, 2, 1] = 1.0
+                return g_x
+
+        elif self.env.dynamics_mode == 'Pendulum':
+            
+            def get_f(state_batch, t_batch=None):
+                f_x = np.zeros(state_batch.shape)
+                theta = state_batch[:, 0]
+                theta_dot = state_batch[:, 1]
+                f_x = np.array(
+                    [
+                        -3*10/2*np.sin(theta+np.pi)*self.env.dt + theta,
+                        theta_dot - 3*10/2*np.sin(theta+np.pi)
+                    ]
+                )
+                return f_x
+
+            def get_g(state_batch, t_batch=None):
+                g_x = np.zeros((state_batch.shape[0], 2, 1))
+                g_x[:, 0, 0] = 3*self.env.dt**2
+                g_x[:, 1, 0] = 3*self.env.dt
+                return g_x
+
+        else:
+            raise Exception('Unknown Dynamics mode.')
+
+        return get_f, get_g
+
+    def get_state(self, obs):
+        """Given the observation, this function does the pre-processing necessary and returns the state.
+
+        Parameters
+        ----------
+        obs_batch : ndarray or torch.tensor
+            Environment observation.
+
+        Returns
+        -------
+        state_batch : ndarray or torch.tensor
+            State of the system.
+
+        """
+
+        expand_dims = len(obs.shape) == 1
+        is_tensor = torch.is_tensor(obs)
+
+        if is_tensor:
+            dtype = obs.dtype
+            device = obs.device
+            obs = to_numpy(obs)
+
+        if expand_dims:
+            obs = np.expand_dims(obs, 0)
+
+        if self.env.dynamics_mode == 'Unicycle':
+            theta = np.arctan2(obs[:, 3], obs[:, 2])
+            state_batch = np.zeros((obs.shape[0], 3))
+            state_batch[:, 0] = obs[:, 0]
+            state_batch[:, 1] = obs[:, 1]
+            state_batch[:, 2] = theta
+        elif self.env.dynamics_mode == 'Pendulum':
+            theta = np.arctan2(obs[:, 1], obs[:, 0])
+            theta_dot = obs[:, 2]
+            state_batch = np.zeros((obs.shape[0], 2))
+            state_batch[:, 0] = theta
+            state_batch[:, 1] = theta_dot
+        else:
+            raise Exception('Unknown dynamics')
+
+        if expand_dims:
+            state_batch = state_batch.squeeze(0)
+
+        return to_tensor(state_batch, dtype, device) if is_tensor else state_batch
+
+    def get_obs(self, state_batch):
+        """Given the state, this function returns it to an observation akin to the one obtained by calling env.step
+
+        Parameters
+        ----------
+        state : ndarray
+            Environment state batch of shape (batch_size, n_s)
+
+        Returns
+        -------
+        obs : ndarray
+          Observation batch of shape (batch_size, n_o)
+
+        """
+
+        if self.env.dynamics_mode == 'Unicycle':
+            obs = np.zeros((state_batch.shape[0], 4))
+            obs[:, 0] = state_batch[:, 0]
+            obs[:, 1] = state_batch[:, 1]
+            obs[:, 2] = np.cos(state_batch[:, 2])
+            obs[:, 3] = np.sin(state_batch[:, 2])
+        else:
+            raise Exception('Unknown dynamics')
+        return obs
+
+    def append_transition(self, state_batch, u_batch, next_state_batch, t_batch=None):
+        """Estimates the disturbance from the current dynamics transition and adds it to buffer.
+
+        Parameters
+        ----------
+        state_batch : ndarray
+            shape (n_s,) or (batch_size, n_s)
+        u_batch : ndarray
+            shape (n_u,) or (batch_size, n_u)
+        next_state_batch : ndarray
+            shape (n_s,) or (batch_size, n_s)
+        t_batch : ndarray, optional
+            shape (1,) or (batch_size, 1)
+
+        Returns
+        -------
+
+        """
+
+        expand_dims = len(state_batch.shape) == 1
+
+        if expand_dims:
+            state_batch = np.expand_dims(state_batch, 0)
+            next_state_batch = np.expand_dims(next_state_batch, 0)
+            u_batch = np.expand_dims(u_batch, 0)
+
+        u_batch = np.expand_dims(u_batch, -1)  # for broadcasting batch matrix multiplication 
+        disturbance_batch = (next_state_batch - state_batch - self.env.dt * (self.get_f(state_batch, t_batch) + (self.get_g(state_batch, t_batch) @ u_batch).squeeze(-1))) / self.env.dt
+
+        # Append new data point (state, disturbance) to our dataset
+        for i in range(state_batch.shape[0]):
+
+            self.disturbance_history['state'][self.history_counter % self.max_history_count] = state_batch[i]
+            self.disturbance_history['disturbance'][self.history_counter % self.max_history_count] = disturbance_batch[i]
+
+            # Increment how many data points we have
+            self.history_counter += 1
+
+            # Update GP models every max_history_count data points
+            if self.history_counter % (self.max_history_count/10) == 0:
+                self.fit_gp_model()
+
+    def fit_gp_model(self, training_iter=70):
+        """
+
+        Parameters
+        ----------
+        training_iter : int
+            Number of training iterations for GP model.
+
+        Returns
+        -------
+
+        """
+
+        if self.history_counter < self.max_history_count:  # didn't fill the buffer yet
+            train_x = self.disturbance_history['state'][:self.history_counter]
+            train_y = self.disturbance_history['disturbance'][:self.history_counter]
+        else:  # buffer filled, use all the data points
+            train_x = self.disturbance_history['state']
+            train_y = self.disturbance_history['disturbance']
+
+        # Normalize Data
+        train_x_std = np.std(train_x, axis=0)
+        train_x_normalized = train_x / (train_x_std + 1e-8)
+        train_y_std = np.std(train_y, axis=0)
+        train_y_normalized = train_y / (train_y_std + 1e-8)
+
+        self.disturb_estimators = []
+        for i in range(self.n_s):
+            # self.disturb_estimators.append(GPyDisturbanceEstimator(train_x, train_y[:, i]))
+            self.disturb_estimators.append(GPyDisturbanceEstimator(train_x_normalized, train_y_normalized[:, i], MAX_STD[self.env.dynamics_mode][i], device=self.device))
+            self.disturb_estimators[i].train(training_iter)
+
+        # track the data I last used to fit the GPs for saving purposes (need it to initialize before loading weights)
+        self.train_x = train_x
+        self.train_y = train_y
+
+    def predict_disturbance(self, test_x):
+        """Predict the disturbance at the queried states using the GP models.
+
+        Parameters
+        ----------
+        test_x : ndarray or torch.tensor
+                shape(n_test, n_s)
+        Returns
+        -------
+        means: ndarray or torch.tensor
+            Prediction means -- shape(n_test, n_s)
+        vars: ndarray or torch.tensor
+            Prediction variances -- shape(n_test, n_s)
+        """
+
+        is_tensor = torch.is_tensor(test_x)
+
+        if is_tensor:
+            dtype = test_x.dtype
+            device = test_x.device
+            test_x = to_numpy(test_x)
+
+        expand_dims = len(test_x.shape) == 1
+        if expand_dims:
+            test_x = np.expand_dims(test_x, axis=0)
+
+        means = np.zeros(test_x.shape)
+        f_std = np.zeros(test_x.shape)  # standard deviation
+
+        if self.disturb_estimators:
+            # Normalize
+            train_x_std = np.std(self.train_x, axis=0)
+            train_y_std = np.std(self.train_y, axis=0)
+            test_x = test_x / train_x_std
+            for i in range(self.n_s):
+                prediction_ = self.disturb_estimators[i].predict(test_x)
+                means[:, i] = prediction_['mean'] * (train_y_std[i] + 1e-8)
+                f_std[:, i] = np.sqrt(prediction_['f_var']) * (train_y_std[i] + 1e-8)
+
+        else:  # zero-mean, max_sigma prior
+            f_std = np.ones(test_x.shape)
+            for i in range(self.n_s):
+                f_std[:, i] *= MAX_STD[self.env.dynamics_mode][i]
+
+        if expand_dims:
+            means = means.squeeze(0)
+            f_std = f_std.squeeze(0)
+
+        return (to_tensor(means, dtype, device), to_tensor(f_std, dtype, device)) if is_tensor else (means, f_std)
+
+    def load_disturbance_models(self, output):
+
+        if output is None:
+            return
+
+        self.disturb_estimators = []
+
+        weights = torch.load('{}/gp_models.pkl'.format(output), map_location=self.device)
+        self.train_x = torch.load('{}/gp_models_train_x.pkl'.format(output))
+        self.train_y = torch.load('{}/gp_models_train_y.pkl'.format(output))
+        for i in range(self.n_s):
+            self.disturb_estimators.append(GPyDisturbanceEstimator(self.train_x, self.train_y[:, i], MAX_STD[self.env.dynamics_mode][i], device=self.device))
+            self.disturb_estimators[i].model.load_state_dict(weights[i])
+
+    def save_disturbance_models(self, output):
+
+        if not self.disturb_estimators or self.train_x is None or self.train_y is None:
+            return
+        weights = []
+        for i in range(len(self.disturb_estimators)):
+            weights.append(self.disturb_estimators[i].model.state_dict())
+        torch.save(weights, '{}/gp_models.pkl'.format(output))
+        # Also save data used to fit model (needed for initializing the model before loading weights)
+        torch.save(self.train_x, '{}/gp_models_train_x.pkl'.format(output))
+        torch.save(self.train_y, '{}/gp_models_train_y.pkl'.format(output))
+
+    def seed(self, seed):
+        torch.manual_seed(seed)
\ No newline at end of file
diff --git a/omnisafe/common/utils.py b/omnisafe/common/utils.py
new file mode 100644
index 000000000..beee622e5
--- /dev/null
+++ b/omnisafe/common/utils.py
@@ -0,0 +1,215 @@
+import math
+import numpy as np
+import os
+import torch
+from torch.autograd import Variable
+
+USE_CUDA = torch.cuda.is_available()
+
+
+def prRed(prt): print("\033[91m {}\033[00m".format(prt))
+
+
+def prGreen(prt): print("\033[92m {}\033[00m".format(prt))
+
+
+def prYellow(prt): print("\033[93m {}\033[00m".format(prt))
+
+
+def prLightPurple(prt): print("\033[94m {}\033[00m".format(prt))
+
+
+def prPurple(prt): print("\033[95m {}\033[00m".format(prt))
+
+
+def prCyan(prt): print("\033[96m {}\033[00m".format(prt))
+
+
+def prLightGray(prt): print("\033[97m {}\033[00m".format(prt))
+
+
+def prBlack(prt): print("\033[98m {}\033[00m".format(prt))
+
+
+def mat_to_euler_2d(rot_mat):
+    """
+    rot_mat has shape:
+                [[c -s  0],
+                 [s  c  0],
+                 [0  0  1]]
+    """
+
+    theta = np.arcsin(rot_mat[1, 0])
+    return theta
+
+
+def euler_to_mat_2d(theta_batch):
+    s = np.sin(theta_batch)
+    c = np.cos(theta_batch)
+    Rs = np.zeros((theta_batch.shape[0], 2, 2))
+    Rs[:, 0, 0] = c
+    Rs[:, 0, 1] = -s
+    Rs[:, 1, 0] = s
+    Rs[:, 1, 1] = c
+    return Rs
+
+def to_numpy(x):
+    # convert torch tensor to numpy array
+    return x.cpu().detach().double().numpy()
+
+def to_tensor(x, dtype, device, requires_grad=False):
+    # convert numpy array to torch tensor
+    if type(x).__module__ != 'numpy':
+        return x
+    return torch.from_numpy(x).type(dtype).to(device).requires_grad_(requires_grad)
+
+def scale_action(action, action_lb, action_ub, device=None):
+
+    act_k = (action_ub - action_lb) / 2.
+    act_b = (action_ub + action_lb) / 2.
+    return act_k * action + act_b
+
+
+def soft_update(target, source, tau):
+    for target_param, param in zip(target.parameters(), source.parameters()):
+        target_param.data.copy_(
+            target_param.data * (1.0 - tau) + param.data * tau
+        )
+
+
+def hard_update(target, source):
+    for target_param, param in zip(target.parameters(), source.parameters()):
+        target_param.data.copy_(param.data)
+
+
+def create_log_gaussian(mean, log_std, t):
+    quadratic = -((0.5 * (t - mean) / (log_std.exp())).pow(2))
+    l = mean.shape
+    log_z = log_std
+    z = l[-1] * math.log(2 * math.pi)
+    log_p = quadratic.sum(dim=-1) - log_z.sum(dim=-1) - 0.5 * z
+    return log_p
+
+
+def logsumexp(inputs, dim=None, keepdim=False):
+    if dim is None:
+        inputs = inputs.view(-1)
+        dim = 0
+    s, _ = torch.max(inputs, dim=dim, keepdim=True)
+    outputs = s + (inputs - s).exp().sum(dim=dim, keepdim=True).log()
+    if not keepdim:
+        outputs = outputs.squeeze(dim)
+    return outputs
+
+
+def get_output_folder(parent_dir, env_name):
+    """Return save folder.
+
+    Assumes folders in the parent_dir have suffix -run{run
+    number}. Finds the highest run number and sets the output folder
+    to that number + 1. This is just convenient so that if you run the
+    same script multiple times tensorboard can plot all of the results
+    on the same plots with different names.
+
+    Parameters
+    ----------
+    parent_dir: str
+      Path of the directory containing all experiment runs.
+
+    Returns
+    -------
+    parent_dir/run_dir
+      Path to this run's save directory.
+    """
+    os.makedirs(parent_dir, exist_ok=True)
+    experiment_id = 0
+    for folder_name in os.listdir(parent_dir):
+        if not os.path.isdir(os.path.join(parent_dir, folder_name)):
+            continue
+        try:
+            folder_name = int(folder_name.split('-run')[-1])
+            if folder_name > experiment_id:
+                experiment_id = folder_name
+        except:
+            pass
+    experiment_id += 1
+
+    parent_dir = os.path.join(parent_dir, env_name)
+    parent_dir = parent_dir + '-run{}'.format(experiment_id)
+    os.makedirs(parent_dir, exist_ok=True)
+    return parent_dir
+
+
+def get_wrapped_policy(agent, cbf_wrapper, dynamics_model, compensator=None, warmup=False, action_space=None,
+                       policy_eval=False):
+
+    def wrapped_policy(observation):
+
+        if warmup and action_space:
+            action = action_space.sample()  # Sample random action
+        else:
+            action, _ = agent.select_action(observation, evaluate=policy_eval)  # Sample action from policy
+
+        if compensator:
+            action_comp = compensator(observation)
+        else:
+            action_comp = 0
+        state = dynamics_model.get_state(observation)
+        disturb_mean, disturb_std = dynamics_model.predict_disturbance(state)
+        action_safe = cbf_wrapper.get_safe_action(state, action + action_comp, disturb_mean, disturb_std)
+        # print('state = {}, action = {}, action_comp = {}, u_safe = {}'.format(state, action, action_comp, u_safe))
+        return action + action_comp + action_safe
+
+    return wrapped_policy
+
+def sort_vertices_cclockwise(vertices):
+    """ Function used to sort vertices of 2D convex polygon in counter clockwise direction.
+
+    Parameters
+    ----------
+    vertices : numpy.ndarray
+            Array of size (n_v, 2) where n_v is the number of vertices and d is the dimension of the space
+
+    Returns
+    -------
+    sorted_vertices : numpy.ndarray
+            Array of size (n_v, 2) of the vertices sorted in counter-clockwise direction.
+    """
+
+    assert vertices.shape[1] == 2, "Vertices must each have dimension 2, got {}".format(vertices.shape[1])
+
+    # Sort vertices
+    polygon_center = vertices.sum(axis=0, keepdims=True) / vertices.shape[0]  # (1, d)
+    rel_vecs = vertices - polygon_center
+    thetas = np.arctan2(rel_vecs[:, 1], rel_vecs[:, 0])
+    idxs = np.argsort(thetas)
+    return vertices[idxs, :]
+
+def get_polygon_normals(vertices):
+    """
+
+    Parameters
+    ----------
+    vertices : numpy.ndarray
+            Array of size (n_v, 2) where n_v is the number of 2D vertices.
+    Returns
+    -------
+    normals : numpy.ndarray
+            Array of size (n_v, 2) where each row i is the 2D normal vector of the line from vertices_sorted[i] - vertices_sorted[i+1]
+
+    centers : numpy.ndarary
+           Array of size (n_v, 2) where each row i is the 2D center point of the segment from vertices_sorted[i] to vertices_sorted[i+1]
+    """
+
+    sorted_vertices = sort_vertices_cclockwise(vertices)  # (n_v, 2)
+    diffs = np.diff(sorted_vertices, axis=0, append=sorted_vertices[[0]])  # (n_v, 2) at row i contains vector from v_i to v_i+1
+
+    # Compute Normals (rotate each diff by -90 degrees)
+    diffs = np.diff(sorted_vertices, axis=0, append=sorted_vertices[[0]])  # (n_v, 2) at row i contains vector from v_i to v_i+1
+    normals = np.array([diffs[:, 1], -diffs[:, 0]]).transpose()
+    normals = normals / np.linalg.norm(normals)
+    # Compute Centers
+    centers = (diffs + 2*vertices) / 2.0
+    return normals, centers
+
+
diff --git a/omnisafe/configs/off-policy/DDPGCBF.yaml b/omnisafe/configs/off-policy/DDPGCBF.yaml
new file mode 100644
index 000000000..1579aa658
--- /dev/null
+++ b/omnisafe/configs/off-policy/DDPGCBF.yaml
@@ -0,0 +1,171 @@
+# Copyright 2023 OmniSafe Team. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+defaults:
+  # seed for random number generator
+  seed: 0
+  # training configurations
+  train_cfgs:
+    # device to use for training, options: cpu, cuda, cuda:0, cuda:0,1, etc.
+    device: cpu
+    # number of threads for torch
+    torch_threads: 16
+    # number of vectorized environments
+    vector_env_nums: 1
+    # number of parallel agent, similar to a3c
+    parallel: 1
+    # total number of steps to train
+    total_steps: 80_000
+    # number of evaluate episodes
+    eval_episodes: 0
+  # algorithm configurations
+  algo_cfgs:
+    # number of steps to update the policy
+    steps_per_epoch: 2000
+    # number of steps per sample
+    update_cycle: 1
+    # number of iterations to update the policy
+    update_iters: 1
+    # The size of replay buffer
+    size: 1000000
+    # The size of batch
+    batch_size: 256
+    # normalize reward
+    reward_normalize: False
+    # normalize cost
+    cost_normalize: False
+    # normalize observation
+    obs_normalize: False
+    # max gradient norm
+    max_grad_norm: 40
+    # use critic norm
+    use_critic_norm: False
+    # critic norm coefficient
+    critic_norm_coeff: 0.001
+    # The soft update coefficient
+    polyak: 0.001
+    # The discount factor of GAE
+    gamma: 0.99
+    # Actor perdorm random action before `start_learning_steps` steps
+    start_learning_steps: 0
+    # The delay step of policy update
+    policy_delay: 1
+    # Whether to use the exploration noise
+    use_exploration_noise: True
+    # The exploration noise
+    exploration_noise: 0.1
+    # use cost
+    use_cost: False
+  # logger configurations
+  logger_cfgs:
+    # use wandb for logging
+    use_wandb: False
+    # wandb project name
+    wandb_project: omnisafe
+    # use tensorboard for logging
+    use_tensorboard: True
+    # save model frequency
+    save_model_freq: 100
+    # save logger path
+    log_dir: "./runs"
+    # save model path
+    window_lens: 10
+  # model configurations
+  model_cfgs:
+    # weight initialization mode
+    weight_initialization_mode: "kaiming_uniform"
+    # actor type
+    actor_type: mlp
+    # linear learning rate decay
+    linear_lr_decay: False
+    # Configuration of Actor network
+    actor:
+      # Size of hidden layers
+      hidden_sizes: [400, 300]
+      # Activation function
+      activation: relu
+      # The learning rate of Actor network
+      lr: 0.0001
+    # Configuration of Critic network
+    critic:
+      # The number of critic networks
+      num_critics: 1
+      # Size of hidden layers
+      hidden_sizes: [400, 300]
+      # Activation function
+      
+      activation: relu
+      # The learning rate of Critic network
+      lr: 0.001
+  # barrier function compensator configurations
+  compensator_cfgs:
+    # hidden layer sizes
+    hidden_sizes: [64, 64]
+    # activation function
+    activation: relu
+    # weight initialization mode
+    weight_initialization_mode: "kaiming_uniform"
+    # learning rate
+    lr: 0.01
+    # number of iterations to update the compensator
+    update_iters: 1
+
+SafetyCarCircle1-v0:
+  # model configurations
+  model_cfgs:
+    # Configuration of Actor network
+    actor:
+      # The learning rate of Actor network
+      lr: 0.000005
+    # Configuration of Critic network
+    critic:
+      # The learning rate of Critic network
+      lr: 0.001
+
+SafetyCarGoal1-v0:
+  # model configurations
+  model_cfgs:
+    # Configuration of Actor network
+    actor:
+      # The learning rate of Actor network
+      lr: 0.000005
+    # Configuration of Critic network
+    critic:
+      # The learning rate of Critic network
+      lr: 0.001
+
+SafetyPointCircle1-v0:
+  # model configurations
+  model_cfgs:
+    # Configuration of Actor network
+    actor:
+      # The learning rate of Actor network
+      lr: 0.000005
+    # Configuration of Critic network
+    critic:
+      # The learning rate of Critic network
+      lr: 0.001
+
+SafetyPointGoal1-v0:
+  # model configurations
+  model_cfgs:
+    # Configuration of Actor network
+    actor:
+      # The learning rate of Actor network
+      lr: 0.000005
+    # Configuration of Critic network
+    critic:
+      # The learning rate of Critic network
+      lr: 0.001
diff --git a/omnisafe/configs/off-policy/SACRCBF.yaml b/omnisafe/configs/off-policy/SACRCBF.yaml
new file mode 100644
index 000000000..bb133e56c
--- /dev/null
+++ b/omnisafe/configs/off-policy/SACRCBF.yaml
@@ -0,0 +1,148 @@
+# Copyright 2023 OmniSafe Team. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+defaults:
+  # seed for random number generator
+  seed: 0
+  # training configurations
+  train_cfgs:
+    # device to use for training, options: cpu, cuda, cuda:0, cuda:0,1, etc.
+    device: cpu
+    # number of threads for torch
+    torch_threads: 4
+    # number of vectorized environments
+    vector_env_nums: 1
+    # number of parallel agent, similar to a3c
+    parallel: 1
+    # total number of steps to train
+    total_steps: 80_000
+    # number of evaluate episodes
+    eval_episodes: 0
+  # algorithm configurations
+  algo_cfgs:
+    # number of steps to update the policy
+    steps_per_epoch: 200
+    # number of steps per sample
+    update_cycle: 1
+    # number of iterations to update the policy
+    update_iters: 1
+    # The size of replay buffer
+    size: 1000000
+    # The size of batch
+    batch_size: 256
+    # normalize reward
+    reward_normalize: False
+    # normalize cost
+    cost_normalize: False
+    # normalize observation
+    obs_normalize: False
+    # max gradient norm
+    max_grad_norm: 40
+    # use critic norm
+    use_critic_norm: False
+    # critic norm coefficient
+    critic_norm_coeff: 0.001
+    # The soft update coefficient
+    polyak: 0.005
+    # The discount factor of GAE
+    gamma: 0.99
+    # Actor perdorm random action before `start_learning_steps` steps
+    start_learning_steps: 5000
+    # The delay step of policy update
+    policy_delay: 1
+    # Whether to use the exploration noise
+    use_exploration_noise: False
+    # The exploration noise
+    exploration_noise: 0.1
+    # The policy noise
+    policy_noise: 0.2
+    # policy_noise_clip
+    policy_noise_clip: 0.5
+    # The value of alpha
+    alpha: 0.2
+    # Whether to use auto alpha
+    auto_alpha: True
+    # use cost
+    use_cost: False
+  # control barrier function configurations
+  cbf_cfgs:
+    # gamma of control barrier certificate.
+    gamma_b: 20
+    # confidence parameter desired
+    k_d: 3.0
+    # environment dynamics coefficient
+    l_p: 0.03
+  # logger configurations
+  logger_cfgs:
+    # use wandb for logging
+    use_wandb: False
+    # wandb project name
+    wandb_project: omnisafe
+    # use tensorboard for logging
+    use_tensorboard: True
+    # save model frequency
+    save_model_freq: 100
+    # save logger path
+    log_dir: "./runs"
+    # save model path
+    window_lens: 10
+  # model configurations
+  model_cfgs:
+    # weight initialization mode
+    weight_initialization_mode: "kaiming_uniform"
+    # actor type
+    actor_type: gaussian_sac
+    # linear learning rate decay
+    linear_lr_decay: False
+    # Configuration of Actor network
+    actor:
+      # Size of hidden layers
+      hidden_sizes: [400, 300]
+      # Activation function
+      activation: relu
+      # The learning rate of Actor network
+      lr: 0.0003
+    # Configuration of Critic network
+    critic:
+      # The number of critic networks
+      num_critics: 2
+      # Size of hidden layers
+      hidden_sizes: [400, 300]
+      # Activation function
+      activation: relu
+      # The learning rate of Critic network
+      lr: 0.0003
+  # Dynamics model configurations
+  dynamics_model_cfgs:
+    # The max number of episodes updateing GP models
+    gp_max_episodes: 100
+    # The size of gp model
+    gp_model_size: 2000
+    # Whether to use the action compensator
+    use_compensator: False
+    
+Pendulum-v1:
+  # algorithm configurations
+  algo_cfgs:
+    # Actor perdorm random action before `start_learning_steps` steps
+    start_learning_steps: 0
+  # control barrier function configurations
+  cbf_cfgs:
+    # gamma of control barrier certificate.
+    gamma_b: 0.5
+    # confidence parameter desired
+    k_d: 1.5
+    # environment dynamics coefficient
+    l_p: 0.03
\ No newline at end of file
diff --git a/omnisafe/configs/on-policy/IPO.yaml b/omnisafe/configs/on-policy/IPO.yaml
index 852b08344..e2a6869c3 100644
--- a/omnisafe/configs/on-policy/IPO.yaml
+++ b/omnisafe/configs/on-policy/IPO.yaml
@@ -27,25 +27,25 @@ defaults:
     # number of parallel agent, similar to a3c
     parallel: 1
     # total number of steps to train
-    total_steps: 10000000
+    total_steps: 80_000
   # algorithm configurations
   algo_cfgs:
     # number of steps to update the policy
-    steps_per_epoch: 20000
+    steps_per_epoch: 2000
     # number of iterations to update the policy
     update_iters: 10
     # batch size for each iteration
-    batch_size: 64
+    batch_size: 256
     # target kl divergence
-    target_kl: 0.02
+    target_kl: 0.005
     # entropy coefficient
     entropy_coef: 0.0
     # normalize reward
-    reward_normalize: True
+    reward_normalize: False
     # normalize cost
-    cost_normalize: True
+    cost_normalize: False
     # normalize observation
-    obs_normalize: True
+    obs_normalize: False
     # early stop when kl divergence is bigger than target kl
     kl_early_stop: True
     # use max gradient norm
@@ -57,11 +57,11 @@ defaults:
     # critic norm coefficient
     critic_norm_coef: 0.001
     # reward discount factor
-    gamma: 0.99
+    gamma: 0.995
     # cost discount factor
     cost_gamma: 0.99
     # lambda for gae
-    lam: 0.95
+    lam: 0.98
     # lambda for cost gae
     lam_c: 0.95
     # clip ratio
@@ -127,7 +127,7 @@ defaults:
   # lagrangian configurations
   lagrange_cfgs:
     # Tolerance of constraint violation
-    cost_limit: 25.0
+    cost_limit: 1000.0
     # Initial value of lagrangian multiplier
     lagrangian_multiplier_init: 0.001
     # Learning rate of lagrangian multiplier
diff --git a/omnisafe/configs/on-policy/PPOBetaCBF.yaml b/omnisafe/configs/on-policy/PPOBetaCBF.yaml
new file mode 100644
index 000000000..4bd5f0f12
--- /dev/null
+++ b/omnisafe/configs/on-policy/PPOBetaCBF.yaml
@@ -0,0 +1,120 @@
+# Copyright 2023 OmniSafe Team. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+defaults:
+  # seed for random number generator
+  seed: 0
+  # training configurations
+  train_cfgs:
+    # device to use for training, options: cpu, cuda, cuda:0, cuda:0,1, etc.
+    device: cpu
+    # number of threads for torch
+    torch_threads: 16
+    # number of vectorized environments
+    vector_env_nums: 1
+    # number of parallel agent, similar to a3c
+    parallel: 1
+    # total number of steps to train
+    total_steps: 80_000
+  # algorithm configurations
+  algo_cfgs:
+    # number of steps to update the policy
+    steps_per_epoch: 2000
+    # number of iterations to update the policy
+    update_iters: 10
+    # batch size for each iteration
+    batch_size: 64
+    # target kl divergence
+    target_kl: 0.02
+    # entropy coefficient
+    entropy_coef: 0.0
+    # normalize reward
+    reward_normalize: False
+    # normalize cost
+    cost_normalize: False
+    # normalize observation
+    obs_normalize: False
+    # early stop when kl divergence is bigger than target kl
+    kl_early_stop: True
+    # use max gradient norm
+    use_max_grad_norm: False
+    # max gradient norm
+    max_grad_norm: 40.0
+    # use critic norm
+    use_critic_norm: True
+    # critic norm coefficient
+    critic_norm_coef: 0.001
+    # reward discount factor
+    gamma: 0.995
+    # cost discount factor
+    cost_gamma: 0.99
+    # lambda for gae
+    lam: 0.98
+    # lambda for cost gae
+    lam_c: 0.95
+    # clip ratio
+    clip: 0.2
+    # advantage estimation method, options: gae, retrace
+    adv_estimation_method: gae
+    # standardize reward advantage
+    standardized_rew_adv: True
+    # standardize cost advantage
+    standardized_cost_adv: True
+    # penalty coefficient
+    penalty_coef: 0.0
+    # use cost
+    use_cost: False
+  # logger configurations
+  logger_cfgs:
+    # use wandb for logging
+    use_wandb: False
+    # wandb project name
+    wandb_project: omnisafe
+    # use tensorboard for logging
+    use_tensorboard: True
+    # save model frequency
+    save_model_freq: 100
+    # save logger path
+    log_dir: "./runs"
+    # save model path
+    window_lens: 100
+  # model configurations
+  model_cfgs:
+    # weight initialization mode
+    weight_initialization_mode: "kaiming_uniform"
+    # actor type, options: gaussian, gaussian_learning
+    actor_type: beta
+    # linear learning rate decay
+    linear_lr_decay: True
+    # exploration noise anneal
+    exploration_noise_anneal: False
+    # std upper bound, and lower bound
+    std_range: [0.5, 0.1]
+    # actor network configurations
+    actor:
+      # hidden layer sizes
+      hidden_sizes: [64, 64]
+      # activation function
+      activation: tanh
+      # out_activation: tanh
+      # learning rate
+      lr: 0.0003
+    critic:
+      # hidden layer sizes
+      hidden_sizes: [64, 64]
+      # activation function
+      activation: tanh
+      # learning rate
+      lr: 0.0003
diff --git a/omnisafe/configs/on-policy/TRPO.yaml b/omnisafe/configs/on-policy/TRPO.yaml
index 455ba163f..a8d60878b 100644
--- a/omnisafe/configs/on-policy/TRPO.yaml
+++ b/omnisafe/configs/on-policy/TRPO.yaml
@@ -124,3 +124,35 @@ defaults:
       activation: tanh
       # learning rate
       lr: 0.001
+
+Pendulum-v1:
+  # training configurations
+  train_cfgs:
+    # total number of steps to train
+    total_steps: 80_000
+  # algorithm configurations
+  algo_cfgs:
+    # number of steps to update the policy
+    steps_per_epoch: 2000
+    # batch size for each iteration
+    batch_size: 256
+    # target kl divergence
+    target_kl: 0.005
+    # normalize observation
+    obs_normalize: False
+    # reward discount factor
+    gamma: 0.995
+    # lambda for gae
+    lam: 0.98
+  # model configurations
+  model_cfgs:
+    # actor network configurations
+    actor:
+      # activation function
+      activation: relu
+  # barrier function compensator configurations
+  compensator_cfgs:
+    # hidden layer sizes
+    hidden_sizes: [64, 64]
+    # activation function
+    activation: relu
\ No newline at end of file
diff --git a/omnisafe/configs/on-policy/TRPOCBF.yaml b/omnisafe/configs/on-policy/TRPOCBF.yaml
new file mode 100644
index 000000000..74922c9d2
--- /dev/null
+++ b/omnisafe/configs/on-policy/TRPOCBF.yaml
@@ -0,0 +1,139 @@
+# Copyright 2023 OmniSafe Team. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+defaults:
+  # seed for random number generator
+  seed: 0
+  # training configurations
+  train_cfgs:
+    # device to use for training, options: cpu, cuda, cuda:0, cuda:0,1, etc.
+    device: cpu
+    # number of threads for torch
+    torch_threads: 16
+    # number of vectorized environments
+    vector_env_nums: 1
+    # number of parallel agent, similar to a3c
+    parallel: 1
+    # total number of steps to train
+    total_steps: 80_000
+  # algorithm configurations
+  algo_cfgs:
+    # number of steps to update the policy
+    steps_per_epoch: 2000
+    # number of iterations to update the policy
+    update_iters: 10
+    # batch size for each iteration
+    batch_size: 256
+    # target kl divergence
+    target_kl: 0.005
+    # entropy coefficient
+    entropy_coef: 0.0
+    # normalize reward
+    reward_normalize: False
+    # normalize cost
+    cost_normalize: False
+    # normalize observation
+    obs_normalize: False
+    # early stop when kl divergence is bigger than target kl
+    kl_early_stop: False
+    # use max gradient norm
+    use_max_grad_norm: True
+    # max gradient norm
+    max_grad_norm: 40.0
+    # use critic norm
+    use_critic_norm: True
+    # critic norm coefficient
+    critic_norm_coef: 0.001
+    # reward discount factor
+    gamma: 0.995
+    # cost discount factor
+    cost_gamma: 0.99
+    # lambda for gae
+    lam: 0.98
+    # lambda for cost gae
+    lam_c: 0.95
+    # advantage estimation method, options: gae, retrace
+    adv_estimation_method: gae
+    # standardize reward advantage
+    standardized_rew_adv: True
+    # standardize cost advantage
+    standardized_cost_adv: True
+    # penalty coefficient
+    penalty_coef: 0.0
+    # use cost
+    use_cost: False
+    # Damping value for conjugate gradient
+    cg_damping: 0.1
+    # Number of conjugate gradient iterations
+    cg_iters: 15
+    # Subsampled observation
+    fvp_obs: None
+    # The sub-sampling rate of the observation
+    fvp_sample_freq: 1
+  # logger configurations
+  logger_cfgs:
+    # use wandb for logging
+    use_wandb: False
+    # wandb project name
+    wandb_project: omnisafe
+    # use tensorboard for logging
+    use_tensorboard: True
+    # save model frequency
+    save_model_freq: 100
+    # save logger path
+    log_dir: "./runs"
+    # save model path
+    window_lens: 100
+  # model configurations
+  model_cfgs:
+    # weight initialization mode
+    weight_initialization_mode: "kaiming_uniform"
+    # actor type, options: gaussian, gaussian_learning
+    actor_type: gaussian_learning
+    # linear learning rate decay
+    linear_lr_decay: False
+    # exploration noise anneal
+    exploration_noise_anneal: False
+    # std upper bound, and lower bound
+    std_range: [0.5, 0.1]
+    # actor network configurations
+    actor:
+      # hidden layer sizes
+      hidden_sizes: [64, 64]
+      # activation function
+      activation: relu
+      # out_activation: tanh
+      # learning rate
+      lr: ~
+    # critic network configurations
+    critic:
+      # hidden layer sizes
+      hidden_sizes: [64, 64]
+      # activation function
+      activation: tanh
+      # learning rate
+      lr: 0.001
+  # barrier function compensator configurations
+  compensator_cfgs:
+    # hidden layer sizes
+    hidden_sizes: [64, 64]
+    # activation function
+    activation: relu
+    # weight initialization mode
+    weight_initialization_mode: "kaiming_uniform"
+    # learning rate
+    lr: 0.01
+    # number of iterations to update the compensator
+    update_iters: 1
\ No newline at end of file
diff --git a/omnisafe/envs/__init__.py b/omnisafe/envs/__init__.py
index 4d225c61d..ebeb6af4e 100644
--- a/omnisafe/envs/__init__.py
+++ b/omnisafe/envs/__init__.py
@@ -19,7 +19,9 @@
 from omnisafe.envs.crabs_env import CRABSEnv
 from omnisafe.envs.custom_env import CustomEnv
 from omnisafe.envs.meta_drive_env import SafetyMetaDriveEnv
+from omnisafe.envs.barrier_function_env import BarrierFunctionEnv
 from omnisafe.envs.mujoco_env import MujocoEnv
 from omnisafe.envs.safety_gymnasium_env import SafetyGymnasiumEnv
 from omnisafe.envs.safety_gymnasium_modelbased import SafetyGymnasiumModelBased
 from omnisafe.envs.safety_isaac_gym_env import SafetyIsaacGymEnv
+from omnisafe.envs.robust_barrier_function_env import RobustBarrierFunctionEnv
diff --git a/omnisafe/envs/barrier_function_env.py b/omnisafe/envs/barrier_function_env.py
new file mode 100644
index 000000000..f8d0d964c
--- /dev/null
+++ b/omnisafe/envs/barrier_function_env.py
@@ -0,0 +1,209 @@
+# Copyright 2023 OmniSafe Team. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Interface of control barrier function-based environments."""
+
+from __future__ import annotations
+
+from typing import Any, ClassVar
+
+import gymnasium
+import numpy as np
+import torch
+
+from gymnasium import spaces
+from omnisafe.envs.core import CMDP, env_register
+from omnisafe.typing import Box
+
+
+# @env_register
+class BarrierFunctionEnv(CMDP):
+    """Interface of control barrier function-based environments.
+    
+    .. warning:: 
+        Since environments based on control barrier functions require special judgment and control of environmental dynamics, 
+        they do not support the use of vectorized environments for parallelization.
+
+    Attributes:
+        need_auto_reset_wrapper (bool): Whether to use auto reset wrapper.
+        need_time_limit_wrapper (bool): Whether to use time limit wrapper.
+    """
+    need_auto_reset_wrapper = True
+    need_time_limit_wrapper = False
+    _support_envs: ClassVar[list[str]] = [
+        'Pendulum-v1',
+    ]
+
+    def __init__(
+        self,
+        env_id: str,
+        num_envs: int = 1,
+        device: str = 'cpu',
+        **kwargs: Any,
+    ) -> None:
+        """Initialize the environment.
+
+        Args:
+            env_id (str): Environment id.
+            num_envs (int, optional): Number of environments. Defaults to 1.
+            device (torch.device, optional): Device to store the data. Defaults to 'cpu'.
+
+        Keyword Args:
+            render_mode (str, optional): The render mode, ranging from ``human``, ``rgb_array``, ``rgb_array_list``.
+                Defaults to ``rgb_array``.
+            camera_name (str, optional): The camera name.
+            camera_id (int, optional): The camera id.
+            width (int, optional): The width of the rendered image. Defaults to 256.
+            height (int, optional): The height of the rendered image. Defaults to 256.
+        """
+        super().__init__(env_id)
+        self._env_id = env_id
+        if num_envs == 1:
+            self._env = gymnasium.make(id=env_id, autoreset=False, **kwargs)
+            self._env_specific_setting()
+            assert isinstance(self._env.action_space, Box), 'Only support Box action space.'
+            assert isinstance(
+                self._env.observation_space,
+                Box,
+            ), 'Only support Box observation space.'
+            self._action_space = self._env.action_space
+            self._observation_space = self._env.observation_space
+        else:
+            raise NotImplementedError('Only support num_envs=1 now.')
+        self._device = torch.device(device)
+
+        self._num_envs = num_envs
+        self._metadata = self._env.metadata
+
+    def _env_specific_setting(self):
+        """Execute some specific setting for environments.
+        
+        Some algorithms based on control barrier functions have made fine-tuning adjustments to the environment. 
+        We have organized these adjustments and encapsulated them in this function.
+        """
+        if self._env_id == 'Pendulum-v1':
+            self._env.unwrapped.max_torque = 15.
+            self._env.unwrapped.max_speed = 60.
+            self._env.unwrapped.action_space = spaces.Box(low=-self._env.unwrapped.max_torque, high=self._env.unwrapped.max_torque, shape=(1,))
+            high = np.array([1., 1., self._env.unwrapped.max_speed])
+            self._env.unwrapped.observation_space = spaces.Box(low=-high, high=high)
+            self._env.dt = 0.05
+            self._env.dynamics_mode = 'Pendulum'
+
+    def step(
+        self,
+        action: torch.Tensor,
+    ) -> tuple[
+        torch.Tensor,
+        torch.Tensor,
+        torch.Tensor,
+        torch.Tensor,
+        torch.Tensor,
+        dict[str, Any],
+    ]:
+        """Step the environment.
+
+        .. note::
+
+            OmniSafe use auto reset wrapper to reset the environment when the episode is
+            terminated. So the ``obs`` will be the first observation of the next episode.
+            And the true ``final_observation`` in ``info`` will be stored in the ``final_observation`` key of ``info``.
+
+        Args:
+            action (torch.Tensor): Action to take.
+
+        Returns:
+            observation: Agent's observation of the current environment.
+            reward: Amount of reward returned after previous action.
+            cost: Amount of cost returned after previous action.
+            terminated: Whether the episode has ended.
+            truncated: Whether the episode has been truncated due to a time limit.
+            info: Auxiliary diagnostic information (helpful for debugging, and sometimes learning).
+        """
+        obs, reward, terminated, truncated, info = self._env.step(
+            action.detach().cpu().numpy(),
+        )
+        obs, reward, terminated, truncated = (
+            torch.as_tensor(x, dtype=torch.float32, device=self._device)
+            for x in (obs, reward, terminated, truncated)
+        )
+        cost = torch.abs(torch.atan2(obs[1], obs[0])).to(self._device)
+
+        if 'final_observation' in info:
+            info['final_observation'] = np.array(
+                [
+                    array if array is not None else np.zeros(obs.shape[-1])
+                    for array in info['final_observation']
+                ],
+            )
+            info['final_observation'] = torch.as_tensor(
+                info['final_observation'],
+                dtype=torch.float32,
+                device=self._device,
+            )
+
+        return obs, reward, cost, terminated, truncated, info
+
+    def reset(
+        self,
+        seed: int | None = None,
+        options: dict[str, Any] | None = None,
+    ) -> tuple[torch.Tensor, dict]:
+        """Reset the environment.
+
+        Args:
+            seed (int, optional): The random seed. Defaults to None.
+            options (dict[str, Any], optional): The options for the environment. Defaults to None.
+
+        Returns:
+            observation: Agent's observation of the current environment.
+            info: Auxiliary diagnostic information (helpful for debugging, and sometimes learning).
+        """
+        obs, info = self._env.reset(seed=seed, options=options)
+        if self._env_id == 'Pendulum-v1':
+            while (self._env.unwrapped.state[0] > 1.0 or self._env.unwrapped.state[0] < -1.0):
+                obs, info = self._env.reset(options=options)
+        return torch.as_tensor(obs, dtype=torch.float32, device=self._device), info
+
+    def set_seed(self, seed: int) -> None:
+        """Set the seed for the environment.
+
+        Args:
+            seed (int): Seed to set.
+        """
+        self.reset(seed=seed)
+
+    def sample_action(self) -> torch.Tensor:
+        """Sample a random action.
+
+        Returns:
+            A random action.
+        """
+        return torch.normal(torch.zeros(self.action_space.shape), torch.ones(self.action_space.shape))
+
+    def render(self) -> Any:
+        """Render the environment.
+
+        Returns:
+            Rendered environment.
+        """
+        return self._env.render()
+
+    def close(self) -> None:
+        """Close the environment."""
+        self._env.close()
+
+    @property
+    def unwrapped(self):
+        return self._env.unwrapped
\ No newline at end of file
diff --git a/omnisafe/envs/robust_barrier_function_env.py b/omnisafe/envs/robust_barrier_function_env.py
new file mode 100644
index 000000000..12e680b86
--- /dev/null
+++ b/omnisafe/envs/robust_barrier_function_env.py
@@ -0,0 +1,224 @@
+# Copyright 2023 OmniSafe Team. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Interface of control barrier function-based environments."""
+
+from __future__ import annotations
+
+from typing import Any, ClassVar
+
+import numpy as np
+import torch
+
+import gymnasium
+from omnisafe.envs.core import CMDP, env_register
+from omnisafe.typing import Box
+from gymnasium import spaces
+from omnisafe.envs.unicycle_env import UnicycleEnv
+
+
+@env_register
+class RobustBarrierFunctionEnv(CMDP):
+    """Interface of control barrier function-based environments.
+    
+    .. warning:: 
+        Since environments based on control barrier functions require special judgment and control of environmental dynamics, 
+        they do not support the use of vectorized environments for parallelization.
+
+    Attributes:
+        need_auto_reset_wrapper (bool): Whether to use auto reset wrapper.
+        need_time_limit_wrapper (bool): Whether to use time limit wrapper.
+    """
+    need_auto_reset_wrapper = True
+    need_time_limit_wrapper = False
+    _support_envs: ClassVar[list[str]] = [
+        'Unicycle',
+        'Pendulum-v1',
+    ]
+
+    def __init__(
+        self,
+        env_id: str,
+        num_envs: int = 1,
+        device: str = 'cpu',
+        **kwargs: Any,
+    ) -> None:
+        """Initialize the environment.
+
+        Args:
+            env_id (str): Environment id.
+            num_envs (int, optional): Number of environments. Defaults to 1.
+            device (torch.device, optional): Device to store the data. Defaults to 'cpu'.
+
+        Keyword Args:
+            render_mode (str, optional): The render mode, ranging from ``human``, ``rgb_array``, ``rgb_array_list``.
+                Defaults to ``rgb_array``.
+            camera_name (str, optional): The camera name.
+            camera_id (int, optional): The camera id.
+            width (int, optional): The width of the rendered image. Defaults to 256.
+            height (int, optional): The height of the rendered image. Defaults to 256.
+        """
+        super().__init__(env_id)
+        self._env_id = env_id
+        if num_envs == 1:
+            if self._env_id == 'Unicycle':
+                self._env = UnicycleEnv()
+            elif self._env_id == 'Pendulum-v1':
+                self._env = gymnasium.make(id=env_id, autoreset=False, **kwargs)
+                self._env_specific_setting()
+            else:
+                raise NotImplementedError('Only support Unicycle now.')
+            assert isinstance(self._env.action_space, Box), 'Only support Box action space.'
+            assert isinstance(
+                self._env.observation_space,
+                Box,
+            ), 'Only support Box observation space.'
+            self._action_space = self._env.action_space
+            self._observation_space = self._env.observation_space
+        else:
+            raise NotImplementedError('Only support num_envs=1 now.')
+        self._device = torch.device(device)
+
+        self._num_envs = num_envs
+        self._metadata = self._env.metadata
+
+    def _env_specific_setting(self):
+        """Execute some specific setting for environments.
+        
+        Some algorithms based on control barrier functions have made fine-tuning adjustments to the environment. 
+        We have organized these adjustments and encapsulated them in this function.
+        """
+        if self._env_id == 'Pendulum-v1':
+            self._env.unwrapped.max_torque = 15.
+            self._env.unwrapped.max_speed = 60.
+            self._env.unwrapped.action_space = spaces.Box(low=-self._env.unwrapped.max_torque, high=self._env.unwrapped.max_torque, shape=(1,))
+            high = np.array([1., 1., self._env.unwrapped.max_speed])
+            self._env.unwrapped.observation_space = spaces.Box(low=-high, high=high)
+
+    def step(
+        self,
+        action: torch.Tensor,
+    ) -> tuple[
+        torch.Tensor,
+        torch.Tensor,
+        torch.Tensor,
+        torch.Tensor,
+        torch.Tensor,
+        dict[str, Any],
+    ]:
+        """Step the environment.
+
+        .. note::
+
+            OmniSafe use auto reset wrapper to reset the environment when the episode is
+            terminated. So the ``obs`` will be the first observation of the next episode.
+            And the true ``final_observation`` in ``info`` will be stored in the ``final_observation`` key of ``info``.
+
+        Args:
+            action (torch.Tensor): Action to take.
+
+        Returns:
+            observation: Agent's observation of the current environment.
+            reward: Amount of reward returned after previous action.
+            cost: Amount of cost returned after previous action.
+            terminated: Whether the episode has ended.
+            truncated: Whether the episode has been truncated due to a time limit.
+            info: Auxiliary diagnostic information (helpful for debugging, and sometimes learning).
+        """
+        if self._env_id == 'Unicycle':
+            obs, reward, cost, terminated, truncated, info = self._env.step(
+                action.detach().cpu().numpy(),
+            )
+            obs, reward, cost, terminated, truncated = (
+                torch.as_tensor(x, dtype=torch.float32, device=self._device)
+                for x in (obs, reward, cost, terminated, truncated)
+            )
+        elif self._env_id == 'Pendulum-v1':
+            obs, reward, terminated, truncated, info = self._env.step(
+                action.detach().cpu().numpy(),
+            )
+            obs, reward, terminated, truncated = (
+                torch.as_tensor(x, dtype=torch.float32, device=self._device)
+                for x in (obs, reward, terminated, truncated)
+            )
+            cost = torch.abs(torch.atan2(obs[1], obs[0])).to(self._device)
+        if 'final_observation' in info:
+            info['final_observation'] = np.array(
+                [
+                    array if array is not None else np.zeros(obs.shape[-1])
+                    for array in info['final_observation']
+                ],
+            )
+            info['final_observation'] = torch.as_tensor(
+                info['final_observation'],
+                dtype=torch.float32,
+                device=self._device,
+            )
+
+        return obs, reward, cost, terminated, truncated, info
+
+    def reset(
+        self,
+        seed: int | None = None,
+        options: dict[str, Any] | None = None,
+    ) -> tuple[torch.Tensor, dict]:
+        """Reset the environment.
+
+        Args:
+            seed (int, optional): The random seed. Defaults to None.
+            options (dict[str, Any], optional): The options for the environment. Defaults to None.
+
+        Returns:
+            observation: Agent's observation of the current environment.
+            info: Auxiliary diagnostic information (helpful for debugging, and sometimes learning).
+        """
+        obs, info = self._env.reset(seed=seed, options=options)
+        if self._env_id == 'Pendulum-v1':
+            while (self._env.unwrapped.state[0] > 1.0 or self._env.unwrapped.state[0] < -1.0):
+                obs, info = self._env.reset(options=options)
+        return torch.as_tensor(obs, dtype=torch.float32, device=self._device), info
+
+    def set_seed(self, seed: int) -> None:
+        """Set the seed for the environment.
+
+        Args:
+            seed (int): Seed to set.
+        """
+        self.reset(seed=seed)
+
+    def sample_action(self) -> torch.Tensor:
+        """Sample a random action.
+
+        Returns:
+            A random action.
+        """
+        return torch.normal(torch.zeros(self.action_space.shape), torch.ones(self.action_space.shape))
+
+    def render(self) -> Any:
+        """Render the environment.
+
+        Returns:
+            Rendered environment.
+        """
+        return self._env.render()
+
+    def close(self) -> None:
+        """Close the environment."""
+        self._env.close()
+    
+    def __getattr__(self, name):
+        try:
+            return getattr(self._env, name)
+        except AttributeError:
+            raise AttributeError(f"'{type(self).__name__}' object has no attribute '{name}'")
diff --git a/omnisafe/envs/unicycle_env.py b/omnisafe/envs/unicycle_env.py
new file mode 100644
index 000000000..fb16394a5
--- /dev/null
+++ b/omnisafe/envs/unicycle_env.py
@@ -0,0 +1,366 @@
+import numpy as np
+import gymnasium as gym
+from gymnasium import spaces
+from collections.abc import Iterable
+
+
+def to_pixel(meas_cm, shift=0):
+
+    if isinstance(meas_cm, Iterable):
+        return 1.5 * 37.795 * meas_cm + np.array(shift)
+
+    return 1.5 * 37.795 * meas_cm + shift
+
+class UnicycleEnv(gym.Env):
+    """Custom Environment that follows SafetyGym interface"""
+
+    metadata = {'render.modes': ['human']}
+
+    def __init__(self, obs_config='default'):
+
+        super(UnicycleEnv, self).__init__()
+
+        self.dynamics_mode = 'Unicycle'
+        # Define action and observation space
+        # They must be gym.spaces objects
+        # Example when using discrete actions:
+        self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(2,))
+        self.safe_action_space = spaces.Box(low=-2.5, high=2.5, shape=(2,))
+        self.observation_space = spaces.Box(low=-1e10, high=1e10, shape=(7,))
+        self.bds = np.array([[-3., -3.], [3., 3.]])
+
+        self.dt = 0.02
+        self.max_episode_steps = 1000
+        self.reward_goal = 1.0
+        self.goal_size = 0.3
+        # Initialize Env
+        self.state = None
+        self.episode_step = 0
+        self.initial_state = np.array([[-2.5, -2.5, 0.0], [-2.5, 2.5, 0.0], [-2.5, 0.0, 0.0], [2.5, -2.5, np.pi/2]])
+        self.goal_pos = np.array([2.5, 2.5])
+        self.rand_init = False  # Random Initial State
+
+        self.reset()
+
+        # Get Dynamics
+        self.get_f, self.get_g = self._get_dynamics()
+        # Disturbance
+        self.disturb_mean = np.zeros((3,))
+        self.disturb_covar = np.diag([0.005, 0.005, 0.05]) * 20
+
+        # Build Hazards
+        self.obs_config = obs_config
+        self.hazards = []
+        if obs_config == 'default':  # default
+            self.hazards.append({'type': 'circle', 'radius': 0.6, 'location': 1.5*np.array([0., 0.])})
+            self.hazards.append({'type': 'circle', 'radius': 0.6, 'location': 1.5*np.array([-1., 1.])})
+            self.hazards.append({'type': 'circle', 'radius': 0.6, 'location': 1.5*np.array([-1., -1.])})
+            self.hazards.append({'type': 'circle', 'radius': 0.6, 'location': 1.5*np.array([1., -1.])})
+            self.hazards.append({'type': 'circle', 'radius': 0.6, 'location': 1.5*np.array([1., 1.])})
+        elif obs_config == 'test':
+            # self.build_hazards(obs_config)
+            self.hazards.append({'type': 'polygon', 'vertices': 0.6*np.array([[-1., -1.], [1., -1], [1., 1.], [-1., 1.]])})
+            self.hazards[-1]['vertices'][:, 0] += 0.5
+            self.hazards[-1]['vertices'][:, 1] -= 0.5
+            self.hazards.append({'type': 'circle', 'radius': 0.6, 'location': 1.5*np.array([1., 1.])})
+            self.hazards.append(
+                {'type': 'polygon', 'vertices': np.array([[0.9, 0.9], [2.1, 2.1], [2.1, 0.9]])})
+        else:
+            n_hazards = 6
+            hazard_radius = 0.6
+            self.get_random_hazard_locations(n_hazards, hazard_radius)
+
+        # Viewer
+        self.viewer = None
+
+
+    def step(self, action):
+        """Organize the observation to understand what's going on
+
+        Parameters
+        ----------
+        action : ndarray
+                Action that the agent takes in the environment
+
+        Returns
+        -------
+        new_obs : ndarray
+          The new observation with the following structure:
+          [pos_x, pos_y, cos(theta), sin(theta), xdir2goal, ydir2goal, dist2goal]
+
+        """
+
+        action = np.clip(action, -1.0, 1.0)
+        state, reward, cost, terminated, truncated, info = self._step(action)
+        return self.get_obs(), reward, cost, terminated, truncated, info
+
+    def _step(self, action):
+        """
+
+        Parameters
+        ----------
+        action
+
+        Returns
+        -------
+        state : ndarray
+            New internal state of the agent.
+        reward : float
+            Reward collected during this transition.
+        terminated : bool
+            Whether the episode terminated.
+        info : dict
+            Additional info relevant to the environment.
+        """
+
+        # Start with our prior for continuous time system x' = f(x) + g(x)u
+        self.state += self.dt * (self.get_f(self.state) + self.get_g(self.state) @ action)
+        self.state -= self.dt * 0.1 * self.get_g(self.state) @ np.array([np.cos(self.state[2]),  0])  #* np.random.multivariate_normal(self.disturb_mean, self.disturb_covar, 1).squeeze()
+
+        self.episode_step += 1
+
+        info = dict()
+
+        dist_goal = self._goal_dist()
+        reward = (self.last_goal_dist - dist_goal)  # -1e-3 * dist_goal
+        self.last_goal_dist = dist_goal
+        # Check if goal is met
+        terminated = False
+        if self.goal_met():
+            info['goal_met'] = True
+            reward += self.reward_goal
+            terminated = True
+        truncated = self.episode_step >= self.max_episode_steps
+
+        # Include constraint cost in reward (only during training, i.e. obs_config=='default')
+        if self.obs_config == 'default':
+            info['cost'] = 0
+            for hazard in self.hazards:
+                if hazard['type'] == 'circle': # They should all be circles if 'default'
+                    info['cost'] += 0.1 * (np.sum((self.state[:2] - hazard['location']) ** 2) < hazard['radius'] ** 2)
+        return self.state, reward, info['cost'], terminated, truncated, info
+
+    def goal_met(self):
+        """Return true if the current goal is met this step
+
+        Returns
+        -------
+        goal_met : bool
+            True if the goal condition is met.
+
+        """
+
+        return np.linalg.norm(self.state[:2] - self.goal_pos) <= self.goal_size
+
+    def reset(self, seed=None, options=None):
+        """ Reset the state of the environment to an initial state.
+
+        Returns
+        -------
+        observation : ndarray
+            Next observation.
+        """
+
+        self.episode_step = 0
+
+        # Re-initialize state
+        if self.rand_init:
+            self.state = np.copy(self.initial_state[np.random.randint(self.initial_state.shape[0])])
+        else:
+            self.state = np.copy(self.initial_state[0])
+
+        # Re-initialize last goal dist
+        self.last_goal_dist = self._goal_dist()
+
+        return self.get_obs(), dict()
+
+    def render(self, mode='human', close=False):
+        """Render the environment to the screen
+
+        Parameters
+        ----------
+        mode : str
+        close : bool
+
+        Returns
+        -------
+
+        """
+
+        if mode != 'human' and mode != 'rgb_array':
+            rel_loc = self.goal_pos - self.state[:2]
+            theta_error = np.arctan2(rel_loc[1], rel_loc[0]) - self.state[2]
+            print('Ep_step = {}, \tState = {}, \tDist2Goal = {}, alignment_error = {}'.format(self.episode_step, self.state, self._goal_dist(), theta_error))
+
+        screen_width = 600
+        screen_height = 400
+
+        if self.viewer is None:
+            from envs import pyglet_rendering
+
+            self.viewer = pyglet_rendering.Viewer(screen_width, screen_height)
+            # Draw obstacles
+            obstacles = []
+            for i in range(len(self.hazards)):
+                if self.hazards[i]['type'] == 'circle':
+                    obstacles.append(pyglet_rendering.make_circle(radius=to_pixel(self.hazards[i]['radius'], shift=0), filled=True))
+                    obs_trans = pyglet_rendering.Transform(translation=(to_pixel(self.hazards[i]['location'][0], shift=screen_width/2), to_pixel(self.hazards[i]['location'][1], shift=screen_height/2)))
+                    obstacles[i].set_color(1.0, 0.0, 0.0)
+                    obstacles[i].add_attr(obs_trans)
+                elif self.hazards[i]['type'] == 'polygon':
+                    obstacles.append(pyglet_rendering.make_polygon(to_pixel(self.hazards[i]['vertices'], shift=[screen_width/2, screen_height/2]), filled=True))
+                self.viewer.add_geom(obstacles[i])
+
+            # Make Goal
+            goal = pyglet_rendering.make_circle(radius=to_pixel(0.1, shift=0), filled=True)
+            goal_trans = pyglet_rendering.Transform(translation=(to_pixel(self.goal_pos[0], shift=screen_width/2), to_pixel(self.goal_pos[1], shift=screen_height/2)))
+            goal.add_attr(goal_trans)
+            goal.set_color(0.0, 0.5, 0.0)
+            self.viewer.add_geom(goal)
+
+            # Make Robot
+            self.robot = pyglet_rendering.make_circle(radius=to_pixel(0.1), filled=True)
+            self.robot_trans = pyglet_rendering.Transform(translation=(to_pixel(self.state[0], shift=screen_width/2), to_pixel(self.state[1], shift=screen_height/2)))
+            self.robot_trans.set_rotation(self.state[2])
+            self.robot.add_attr(self.robot_trans)
+            self.robot.set_color(0.5, 0.5, 0.8)
+            self.viewer.add_geom(self.robot)
+            self.robot_orientation = pyglet_rendering.Line(start=(0.0, 0.0), end=(15.0, 0.0))
+            self.robot_orientation.linewidth.stroke = 2
+            self.robot_orientation.add_attr(self.robot_trans)
+            self.robot_orientation.set_color(0, 0, 0)
+            self.viewer.add_geom(self.robot_orientation)
+
+        if self.state is None:
+            return None
+
+        self.robot_trans.set_translation(to_pixel(self.state[0], shift=screen_width/2), to_pixel(self.state[1], shift=screen_height/2))
+        self.robot_trans.set_rotation(self.state[2])
+
+        return self.viewer.render(return_rgb_array=mode == "rgb_array")
+
+    def get_obs(self):
+        """Given the state, this function returns it to an observation akin to the one obtained by calling env.step
+
+        Parameters
+        ----------
+
+        Returns
+        -------
+        observation : ndarray
+          Observation: [pos_x, pos_y, cos(theta), sin(theta), xdir2goal, ydir2goal, exp(-dist2goal)]
+        """
+
+        rel_loc = self.goal_pos - self.state[:2]
+        goal_dist = np.linalg.norm(rel_loc)
+        goal_compass = self.obs_compass()  # compass to the goal
+
+        return np.array([self.state[0], self.state[1], np.cos(self.state[2]), np.sin(self.state[2]), goal_compass[0], goal_compass[1], np.exp(-goal_dist)])
+
+    def _get_dynamics(self):
+        """Get affine CBFs for a given environment.
+
+        Parameters
+        ----------
+
+        Returns
+        -------
+        get_f : callable
+                Drift dynamics of the continuous system x' = f(x) + g(x)u
+        get_g : callable
+                Control dynamics of the continuous system x' = f(x) + g(x)u
+        """
+
+        def get_f(state):
+            f_x = np.zeros(state.shape)
+            return f_x
+
+        def get_g(state):
+            theta = state[2]
+            g_x = np.array([[np.cos(theta), 0],
+                            [np.sin(theta), 0],
+                            [            0, 1.0]])
+            return g_x
+
+        return get_f, get_g
+
+    def obs_compass(self):
+        """
+        Return a robot-centric compass observation of a list of positions.
+        Compass is a normalized (unit-lenght) egocentric XY vector,
+        from the agent to the object.
+        This is equivalent to observing the egocentric XY angle to the target,
+        projected into the sin/cos space we use for joints.
+        (See comment on joint observation for why we do this.)
+        """
+
+        # Get ego vector in world frame
+        vec = self.goal_pos - self.state[:2]
+        # Rotate into frame
+        R = np.array([[np.cos(self.state[2]), -np.sin(self.state[2])], [np.sin(self.state[2]), np.cos(self.state[2])]])
+        vec = np.matmul(vec, R)
+        # Normalize
+        vec /= np.sqrt(np.sum(np.square(vec))) + 0.001
+        return vec
+
+    def _goal_dist(self):
+        return np.linalg.norm(self.goal_pos - self.state[:2])
+
+    def close(self):
+        if self.viewer:
+            self.viewer.close()
+            self.viewer = None
+
+    def get_random_hazard_locations(self, n_hazards: int, hazard_radius: float):
+        """
+
+        Parameters
+        ----------
+        n_hazards : int
+            Number of hazards to create
+        hazard_radius : float
+            Radius of hazards
+
+        Returns
+        -------
+        hazards_locs : ndarray
+            Numpy array of shape (n_hazards, 2) containing xy locations of hazards.
+        """
+
+        # Create buffer with boundaries
+        buffered_bds = np.copy(self.bds)
+        buffered_bds[0] = buffered_bds[0] + hazard_radius
+        buffered_bds[1] -= hazard_radius
+
+        hazards = []
+        hazards_centers = np.zeros((n_hazards, 2))
+        n = 0  # Number of hazards actually placed
+        for i in range(n_hazards):
+            successfully_placed = False
+            iter = 0
+            hazard_type = np.random.randint(3)  # 0-> Circle 1->Square 2->Triangle
+            radius = hazard_radius * (1-0.2*2.0*(np.random.random() - 0.5))
+            while not successfully_placed and iter < 100:
+                hazards_centers[n] = (buffered_bds[1] - buffered_bds[0]) * np.random.random(2) + buffered_bds[0]
+                successfully_placed = np.all(np.linalg.norm(hazards_centers[:n] - hazards_centers[[n]], axis=1) > 3.5*hazard_radius)
+                successfully_placed = np.logical_and(successfully_placed, np.linalg.norm(self.goal_pos - hazards_centers[n]) > 2.0*hazard_radius)
+                successfully_placed = np.logical_and(successfully_placed, np.all(np.linalg.norm(self.initial_state[:, :2] - hazards_centers[[n]], axis=1) > 2.0*hazard_radius))
+                iter += 1
+            if not successfully_placed:
+                continue
+            if hazard_type == 0:  # Circle
+                hazards.append({'type': 'circle', 'location': hazards_centers[n], 'radius': radius})
+            elif hazard_type == 1:  # Square
+                hazards.append({'type': 'polygon', 'vertices': np.array(
+                    [[-radius, -radius], [-radius, radius], [radius, radius], [radius, -radius]])})
+                hazards[-1]['vertices'] += hazards_centers[n]
+            else:  # Triangle
+                hazards.append({'type': 'polygon', 'vertices': np.array(
+                    [[-radius, -radius], [-radius, radius], [radius, radius], [radius, -radius]])})
+                # Pick a vertex and delete it
+                idx = np.random.randint(4)
+                hazards[-1]['vertices'] = np.delete(hazards[-1]['vertices'], idx, axis=0)
+                hazards[-1]['vertices'] += hazards_centers[n]
+            n += 1
+
+        self.hazards = hazards
diff --git a/omnisafe/models/actor/actor_builder.py b/omnisafe/models/actor/actor_builder.py
index cd1a0df15..80c68e1be 100644
--- a/omnisafe/models/actor/actor_builder.py
+++ b/omnisafe/models/actor/actor_builder.py
@@ -21,6 +21,7 @@
 from omnisafe.models.actor.mlp_actor import MLPActor
 from omnisafe.models.actor.perturbation_actor import PerturbationActor
 from omnisafe.models.actor.vae_actor import VAE
+from omnisafe.models.actor.beta_learning_actor import BetaLearningActor
 from omnisafe.models.base import Actor
 from omnisafe.typing import Activation, ActorType, InitFunction, OmnisafeSpace
 
@@ -114,6 +115,14 @@ def build_actor(
                 activation=self._activation,
                 weight_initialization_mode=self._weight_initialization_mode,
             )
+        if actor_type == 'beta':
+            return BetaLearningActor(
+                self._obs_space,
+                self._act_space,
+                self._hidden_sizes,
+                activation=self._activation,
+                weight_initialization_mode=self._weight_initialization_mode,
+            )
         raise NotImplementedError(
             f'Actor type {actor_type} is not implemented! '
             f'Available actor types are: gaussian_learning, gaussian_sac, mlp, vae, perturbation.',
diff --git a/omnisafe/models/actor/beta_learning_actor.py b/omnisafe/models/actor/beta_learning_actor.py
new file mode 100644
index 000000000..8f9675934
--- /dev/null
+++ b/omnisafe/models/actor/beta_learning_actor.py
@@ -0,0 +1,144 @@
+# Copyright 2023 OmniSafe Team. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Implementation of BetaLearningActor."""
+
+from __future__ import annotations
+
+import torch
+import torch.nn as nn
+import numpy as np
+
+from torch.distributions import Distribution, Beta
+
+from omnisafe.models.actor.gaussian_actor import GaussianActor
+from omnisafe.typing import Activation, InitFunction, OmnisafeSpace
+from omnisafe.utils.model import build_mlp_network
+from omnisafe.models.base import Actor
+
+
+# pylint: disable-next=too-many-instance-attributes
+class BetaLearningActor(Actor):
+
+
+    _current_dist: Beta
+    
+    def __init__(
+        self,
+        obs_space: OmnisafeSpace,
+        act_space: OmnisafeSpace,
+        hidden_sizes: list[int],
+        activation: Activation = 'relu',
+        weight_initialization_mode: InitFunction = 'kaiming_uniform',
+    ) -> None:
+        """Initialize an instance of :class:`GaussianLearningActor`."""
+        super().__init__(obs_space, act_space, hidden_sizes, activation, weight_initialization_mode)
+        
+        self.mean: nn.Module = build_mlp_network(
+            sizes=[self._obs_dim, self._hidden_sizes[0], self._hidden_sizes[0]],
+            activation=activation,
+            output_activation='tanh',
+            weight_initialization_mode=weight_initialization_mode,
+        )
+        
+        self.alpha_net: nn.Module = build_mlp_network(
+            sizes=[self._hidden_sizes[-1], self._act_dim],
+            activation='identity',
+            output_activation='softplus',
+            weight_initialization_mode=weight_initialization_mode,
+        )
+        
+        self.beta_net: nn.Module = build_mlp_network(
+            sizes=[self._hidden_sizes[-1], self._act_dim],
+            activation='identity',
+            output_activation='softplus',
+            weight_initialization_mode=weight_initialization_mode,
+        )
+        
+    def _distribution(self, obs: torch.Tensor) -> Beta:
+        """Get the distribution of the actor.
+
+        .. warning::
+            This method is not supposed to be called by users. You should call :meth:`forward`
+            instead.
+
+        Args:
+            obs (torch.Tensor): Observation from environments.
+
+        Returns:
+            The normal distribution of the mean and standard deviation from the actor.
+        """
+        mean = self.mean(obs)
+        alphas = 1.0+self.alpha_net(mean)
+        betas = 1.0+self.beta_net(mean)
+        return Beta(alphas, betas)
+
+    def predict(self, obs: torch.Tensor, deterministic: bool = False) -> torch.Tensor:
+        """Predict the action given observation.
+
+        The predicted action depends on the ``deterministic`` flag.
+
+        - If ``deterministic`` is ``True``, the predicted action is the mean of the distribution.
+        - If ``deterministic`` is ``False``, the predicted action is sampled from the distribution.
+
+        Args:
+            obs (torch.Tensor): Observation from environments.
+            deterministic (bool, optional): Whether to use deterministic policy. Defaults to False.
+
+        Returns:
+            The mean of the distribution if deterministic is True, otherwise the sampled action.
+        """
+        self._current_dist = self._distribution(obs)
+        self._after_inference = True
+        if deterministic:
+            return self._current_dist.mean
+        return self._current_dist.rsample()
+
+    def forward(self, obs: torch.Tensor) -> Distribution:
+        """Forward method.
+
+        Args:
+            obs (torch.Tensor): Observation from environments.
+
+        Returns:
+            The current distribution.
+        """
+        self._current_dist = self._distribution(obs)
+        self._after_inference = True
+        return self._current_dist
+
+    def log_prob(self, act: torch.Tensor) -> torch.Tensor:
+        """Compute the log probability of the action given the current distribution.
+
+        .. warning::
+            You must call :meth:`forward` or :meth:`predict` before calling this method.
+
+        Args:
+            act (torch.Tensor): Action from :meth:`predict` or :meth:`forward` .
+
+        Returns:
+            Log probability of the action.
+        """
+        assert self._after_inference, 'log_prob() should be called after predict() or forward()'
+        self._after_inference = False
+        return self._current_dist.log_prob(act).sum(axis=-1)
+
+    @property
+    def std(self) -> float:
+        """Standard deviation of the distribution."""
+        return 1.0
+
+    @std.setter
+    def std(self, std: float) -> None:
+        pass
diff --git a/omnisafe/typing.py b/omnisafe/typing.py
index bf73b558f..492067e72 100644
--- a/omnisafe/typing.py
+++ b/omnisafe/typing.py
@@ -39,7 +39,7 @@
 AdvatageEstimator = Literal['gae', 'gae-rtg', 'vtrace', 'plain']
 InitFunction = Literal['kaiming_uniform', 'xavier_normal', 'glorot', 'xavier_uniform', 'orthogonal']
 CriticType = Literal['v', 'q']
-ActorType = Literal['gaussian_learning', 'gaussian_sac', 'mlp', 'vae', 'perturbation']
+ActorType = Literal['gaussian_learning', 'gaussian_sac', 'mlp', 'vae', 'perturbation', 'beta']
 DEVICE_CPU = torch.device('cpu')
 
 

From 025eea9b2c3df2ef6a5f4da4c62d7f4f7b646aba Mon Sep 17 00:00:00 2001
From: Gaiejj <524339208@qq.com>
Date: Tue, 30 Apr 2024 16:33:04 +0800
Subject: [PATCH 02/18] fix: fix test

---
 omnisafe/adapter/__init__.py                  |   2 +-
 omnisafe/adapter/barrier_function_adapter.py  |  55 +-
 .../adapter/beta_barrier_function_adapter.py  | 105 +--
 .../offpolicy_barrier_function_adapter.py     | 116 +++-
 .../robust_barrier_function_adapter.py        | 126 ++--
 omnisafe/algorithms/__init__.py               |   8 +-
 omnisafe/algorithms/off_policy/__init__.py    |   4 +-
 omnisafe/algorithms/off_policy/ddpg.py        |  23 +-
 omnisafe/algorithms/off_policy/ddpg_cbf.py    |  76 ++-
 omnisafe/algorithms/off_policy/sac_rcbf.py    |  69 +-
 omnisafe/algorithms/on_policy/__init__.py     |   4 +-
 .../on_policy/barrier_function/__init__.py    |   2 +-
 .../on_policy/barrier_function/ppo_cbf.py     |  10 +-
 .../on_policy/barrier_function/trpo_cbf.py    |  49 +-
 omnisafe/algorithms/on_policy/base/ppo.py     |  56 --
 omnisafe/common/barrier_comp.py               |  23 +-
 omnisafe/common/barrier_solver.py             | 208 +++---
 .../common/buffer/vector_onpolicy_buffer.py   |   2 +-
 omnisafe/common/robust_barrier_solver.py      | 513 +++++++--------
 omnisafe/common/robust_gp_model.py            | 601 ++++++++----------
 omnisafe/common/utils.py                      | 218 +------
 omnisafe/configs/off-policy/DDPGCBF.yaml      |   6 +-
 omnisafe/configs/off-policy/SACRCBF.yaml      |  24 +-
 omnisafe/configs/on-policy/TRPO.yaml          |   2 +-
 omnisafe/configs/on-policy/TRPOCBF.yaml       |   2 +-
 omnisafe/envs/__init__.py                     |   2 +
 omnisafe/envs/barrier_function_env.py         |  64 +-
 omnisafe/envs/robust_barrier_function_env.py  |  71 +--
 omnisafe/envs/unicycle_env.py                 | 401 +++++++-----
 omnisafe/evaluator.py                         |  71 +++
 omnisafe/models/actor/actor_builder.py        |   2 +-
 omnisafe/models/actor/beta_learning_actor.py  |  25 +-
 pyproject.toml                                |   5 +
 requirements.txt                              |   5 +
 34 files changed, 1482 insertions(+), 1468 deletions(-)

diff --git a/omnisafe/adapter/__init__.py b/omnisafe/adapter/__init__.py
index 75d4539ba..02dab6709 100644
--- a/omnisafe/adapter/__init__.py
+++ b/omnisafe/adapter/__init__.py
@@ -14,6 +14,7 @@
 # ==============================================================================
 """Adapter for the environment and the algorithm."""
 
+from omnisafe.adapter.beta_barrier_function_adapter import BetaBarrierFunctionAdapter
 from omnisafe.adapter.early_terminated_adapter import EarlyTerminatedAdapter
 from omnisafe.adapter.modelbased_adapter import ModelBasedAdapter
 from omnisafe.adapter.offline_adapter import OfflineAdapter
@@ -22,4 +23,3 @@
 from omnisafe.adapter.onpolicy_adapter import OnPolicyAdapter
 from omnisafe.adapter.saute_adapter import SauteAdapter
 from omnisafe.adapter.simmer_adapter import SimmerAdapter
-from omnisafe.adapter.beta_barrier_function_adapter import BetaBarrierFunctionAdapter
diff --git a/omnisafe/adapter/barrier_function_adapter.py b/omnisafe/adapter/barrier_function_adapter.py
index 47fa9b871..735ff690e 100644
--- a/omnisafe/adapter/barrier_function_adapter.py
+++ b/omnisafe/adapter/barrier_function_adapter.py
@@ -20,26 +20,20 @@
 from rich.progress import track
 
 from omnisafe.adapter.onpolicy_adapter import OnPolicyAdapter
+from omnisafe.common.barrier_comp import BarrierCompensator
+from omnisafe.common.barrier_solver import PendulumSolver
 from omnisafe.common.buffer import VectorOnPolicyBuffer
 from omnisafe.common.logger import Logger
+from omnisafe.envs.wrapper import AutoReset, CostNormalize, RewardNormalize, TimeLimit, Unsqueeze
 from omnisafe.models.actor_critic.constraint_actor_critic import ConstraintActorCritic
 from omnisafe.utils.config import Config
-from omnisafe.common.barrier_solver import PendulumSolver
-from omnisafe.common.barrier_comp import BarrierCompensator
 
-from omnisafe.envs.wrapper import (
-    AutoReset,
-    CostNormalize,
-    RewardNormalize,
-    TimeLimit,
-    Unsqueeze,
-)
 
 class BarrierFunctionAdapter(OnPolicyAdapter):
     """BarrierFunction Adapter for OmniSafe.
 
-    The BarrierFunction Adapter is used to establish the logic of interaction between agents and the 
-    environment based on control barrier functions. Its key feature is the introduction of action 
+    The BarrierFunction Adapter is used to establish the logic of interaction between agents and the
+    environment based on control barrier functions. Its key feature is the introduction of action
     compensators and barrier function solvers.
 
     Args:
@@ -63,10 +57,10 @@ def _wrapper(
         cost_normalize: bool = True,
     ) -> None:
         """Wrapper the environment.
-        
+
         .. warning::
-            Since solving the optimization problem requires obtaining physical quantities with practical 
-            significance from state observations, the Barrier Function Adapter does not support 
+            Since solving the optimization problem requires obtaining physical quantities with practical
+            significance from state observations, the Barrier Function Adapter does not support
             normalization of observations.
 
         Args:
@@ -89,15 +83,15 @@ def _wrapper(
             self._env = Unsqueeze(self._env, device=self._device)
         self._eval_env = Unsqueeze(self._eval_env, device=self._device)
 
-    def set_solver(self, solver: PendulumSolver):
+    def set_solver(self, solver: PendulumSolver) -> None:
         """Set the barrier function solver for Pendulum environment."""
         self.solver: PendulumSolver = solver
-        
-    def set_compensator(self, compensator: BarrierCompensator):
+
+    def set_compensator(self, compensator: BarrierCompensator) -> None:
         """Set the action compensator."""
         self.compensator: BarrierCompensator = compensator
 
-    def reset_gp_model(self):
+    def reset_gp_model(self) -> None:
         """Reset the gaussian processing model of barrier function solver."""
         self.solver.GP_model_prev = self.solver.GP_model.copy()
         self.solver.build_GP_model()
@@ -111,10 +105,6 @@ def rollout(  # pylint: disable=too-many-locals
     ) -> None:
         """Rollout the environment and store the data in the buffer.
 
-        .. warning::
-            As OmniSafe uses :class:`AutoReset` wrapper, the environment will be reset automatically,
-            so the final observation will be stored in ``info['final_observation']``.
-
         Args:
             steps_per_epoch (int): Number of steps per epoch.
             agent (ConstraintActorCritic): Constraint actor-critic, including actor , reward critic
@@ -143,17 +133,23 @@ def rollout(  # pylint: disable=too-many-locals
 
                 approx_compensating_act = self.compensator(obs=obs)
                 compensated_act_mean_raw = act_mean + approx_compensating_act
-                
+
                 if self.first_iter:
-                    [f, g, x, std] = self.solver.get_GP_dynamics(obs, use_prev_model = False)
+                    [f, g, x, std] = self.solver.get_GP_dynamics(obs, use_prev_model=False)
                 else:
-                    [f, g, x, std] = self.solver.get_GP_dynamics(obs, use_prev_model = True)
-                
-                compensating_act = self.solver.control_barrier(compensated_act_mean_raw, f, g, x, std)
+                    [f, g, x, std] = self.solver.get_GP_dynamics(obs, use_prev_model=True)
+
+                compensating_act = self.solver.control_barrier(
+                    compensated_act_mean_raw,
+                    f,
+                    g,
+                    x,
+                    std,
+                )
 
                 compensated_act_mean = compensated_act_mean_raw + compensating_act
                 final_act = torch.normal(compensated_act_mean, act_std)
-            
+
             logp = agent.actor.log_prob(final_act).detach()
             path_obs.append(obs.detach().cpu().squeeze().numpy())
             path_act.append(final_act.detach().cpu().squeeze().numpy())
@@ -207,7 +203,7 @@ def rollout(  # pylint: disable=too-many-locals
                         self._ep_len[idx] = 0.0
 
                         if step < 650:
-                            self.solver.update_GP_dynamics(obs = path_obs, act = path_act)
+                            self.solver.update_GP_dynamics(obs=path_obs, act=path_act)
 
                         path_obs = []
                         path_act = []
@@ -216,4 +212,3 @@ def rollout(  # pylint: disable=too-many-locals
                             obs, _ = self._env.reset()
                     buffer.finish_path(last_value_r, last_value_c, idx)
         self.first_iter = 0
-
diff --git a/omnisafe/adapter/beta_barrier_function_adapter.py b/omnisafe/adapter/beta_barrier_function_adapter.py
index f785c3062..ee8ccc298 100644
--- a/omnisafe/adapter/beta_barrier_function_adapter.py
+++ b/omnisafe/adapter/beta_barrier_function_adapter.py
@@ -16,84 +16,86 @@
 
 from __future__ import annotations
 
-import torch
 import numpy as np
+import torch
 from rich.progress import track
 
 from omnisafe.adapter.onpolicy_adapter import OnPolicyAdapter
 from omnisafe.common.buffer import VectorOnPolicyBuffer
 from omnisafe.common.logger import Logger
+from omnisafe.envs.wrapper import CostNormalize, RewardNormalize, Unsqueeze
 from omnisafe.models.actor_critic.constraint_actor_critic import ConstraintActorCritic
 from omnisafe.utils.config import Config
-from omnisafe.common.barrier_solver import PendulumSolver
-from omnisafe.common.barrier_comp import BarrierCompensator
-
-from omnisafe.envs.wrapper import (
-    AutoReset,
-    CostNormalize,
-    RewardNormalize,
-    TimeLimit,
-    Unsqueeze,
-)
 
 
-def cbf(state=None, eta: float = 0.99):
+def cbf(state: np.ndarray | None = None, eta: float = 0.99) -> tuple[np.ndarray, np.ndarray]:
     """
     Calculates CBF constraint set at a given state. Default is
     the current state.
     """
-
-    state = state
     g = 9.8
     m = 1
-    l = 1
+    length = 1
     tau = 5e-2
     theta_safety_bounds = [-1.0, 1.0]
     thetadot_safety_bounds = [-np.inf, np.inf]
     torque_bounds = [-15.0, 15.0]
-    if (eta>1-1e-3) or (eta<1e-5):
-        raise ValueError("eta should be inside (0, 1)")
-    c1 = ((3 * g)/(2 * l))
-    c2 = (3 /(m * (l ** 2)))
+    if (eta > 1 - 1e-3) or (eta < 1e-5):
+        raise ValueError('eta should be inside (0, 1)')
+    c1 = (3 * g) / (2 * length)
+    c2 = 3 / (m * (length**2))
 
     theta, thetadot = state[0], state[1]
     theta_min, theta_max = theta_safety_bounds[0], theta_safety_bounds[1]
     thetadot_min, thetadot_max = thetadot_safety_bounds[0], thetadot_safety_bounds[1]
-    u_min1 = (1/c2) * (((1 / (tau **2)) * (-eta * (theta - theta_min) - tau * thetadot)) - c1 * np.sin(theta) )
-    u_max1 = (1/c2) * (((1 / (tau **2)) * ( eta * (theta_max - theta) - tau * thetadot)) - c1 * np.sin(theta) )
+    u_min1 = (1 / c2) * (
+        ((1 / (tau**2)) * (-eta * (theta - theta_min) - tau * thetadot)) - c1 * np.sin(theta)
+    )
+    u_max1 = (1 / c2) * (
+        ((1 / (tau**2)) * (eta * (theta_max - theta) - tau * thetadot)) - c1 * np.sin(theta)
+    )
 
-    
-    u_min2 = (1/c2) * (((1 / (tau)) * (-eta * (thetadot - thetadot_min))) - c1 * np.sin(theta) )
-    u_max2 = (1/c2) * (((1 / (tau)) * ( eta * (thetadot_max - thetadot))) - c1 * np.sin(theta) )
+    u_min2 = (1 / c2) * (((1 / (tau)) * (-eta * (thetadot - thetadot_min))) - c1 * np.sin(theta))
+    u_max2 = (1 / c2) * (((1 / (tau)) * (eta * (thetadot_max - thetadot))) - c1 * np.sin(theta))
 
     u_min = max(u_min1, u_min2, torque_bounds[0])
     u_max = min(u_max1, u_max2, torque_bounds[1])
-    
-    u_min=torque_bounds[0]
-    u_max=torque_bounds[1]
-    if u_min>u_max:
-        raise ValueError("Infeasible")
-    else:
-        return [u_min, u_max]
-
-def vectorize_f(f): #--vipul :added action_dim
-    """
-    Converts a function f defined on 1D numpy arrays and outputting pairs of
-    scalars into a vectorized function accepting batches of
-    torch tensorized arrays and output pairs of torch tensors.
+
+    u_min = torque_bounds[0]
+    u_max = torque_bounds[1]
+
+    return [u_min, u_max]
+
+
+def vectorize_f(f: callable) -> callable:
+    """Converts a function `f` that operates on 1D numpy arrays and outputs pairs of scalars,
+    into a vectorized function that accepts batches of torch tensorized arrays and outputs
+    pairs of torch tensors.
+
+    Args:
+        f (callable): A function that accepts 1D numpy arrays and returns a tuple (lower_bound, upper_bound), where both are scalars.
+
+    Returns:
+        callable: A vectorized function that can process batches of torch tensors and return pairs of torch tensors.
     """
 
-    def vectorized_f_(obs): #--vipul :added action_dim
+    def vectorized_f_(obs: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
+        """
+        Inner function to process the torch tensor batch.
+
+        Args:
+            obs (torch.Tensor): A batch of observations as torch tensors.
 
+        Returns:
+            tuple: Two torch tensors representing the lower and upper bounds for each observation in the batch.
+        """
         obs = obs.cpu().detach().numpy()
 
-        if len(obs.shape) == 1:  # check to see if obs is a batch or single obs
+        if len(obs.shape) == 1:
             batch_size = 1
             lbs, ubs = f(obs)
-            lbs=np.array(lbs)
-            ubs=np.array(ubs)
-            #lbs = -5
-            #ubs = 5
+            lbs = np.array(lbs)
+            ubs = np.array(ubs)
 
         else:
             batch_size = obs.shape[0]
@@ -104,7 +106,7 @@ def vectorized_f_(obs): #--vipul :added action_dim
 
         lbs = torch.FloatTensor(lbs).reshape(batch_size, 1)
         ubs = torch.FloatTensor(ubs).reshape(batch_size, 1)
-        
+
         return lbs, ubs
 
     return vectorized_f_
@@ -113,8 +115,8 @@ def vectorized_f_(obs): #--vipul :added action_dim
 class BetaBarrierFunctionAdapter(OnPolicyAdapter):
     """BarrierFunction Adapter for OmniSafe.
 
-    The BarrierFunction Adapter is used to establish the logic of interaction between agents and the 
-    environment based on control barrier functions. Its key feature is the introduction of action 
+    The BarrierFunction Adapter is used to establish the logic of interaction between agents and the
+    environment based on control barrier functions. Its key feature is the introduction of action
     compensators and barrier function solvers.
 
     Args:
@@ -139,10 +141,10 @@ def _wrapper(
         cost_normalize: bool = True,
     ) -> None:
         """Wrapper the environment.
-        
+
         .. warning::
-            Since solving the optimization problem requires obtaining physical quantities with practical 
-            significance from state observations, the Barrier Function Adapter does not support 
+            Since solving the optimization problem requires obtaining physical quantities with practical
+            significance from state observations, the Barrier Function Adapter does not support
             normalization of observations.
 
         Args:
@@ -190,10 +192,10 @@ def rollout(  # pylint: disable=too-many-locals
             with torch.no_grad():
                 act, value_r, value_c, logp = agent.step(obs)
                 lb, ub = self.constraint_fn(obs)
-                final_act = lb + (ub-lb)*act
+                final_act = lb + (ub - lb) * act
 
             next_obs, reward, cost, terminated, truncated, info = self.step(final_act)
-            
+
             self._log_value(reward=reward, cost=cost, info=info)
 
             if self._cfgs.algo_cfgs.use_cost:
@@ -242,4 +244,3 @@ def rollout(  # pylint: disable=too-many-locals
                             obs, _ = self._env.reset()
                     buffer.finish_path(last_value_r, last_value_c, idx)
         self.first_iter = 0
-
diff --git a/omnisafe/adapter/offpolicy_barrier_function_adapter.py b/omnisafe/adapter/offpolicy_barrier_function_adapter.py
index b05e950cb..e1353884b 100644
--- a/omnisafe/adapter/offpolicy_barrier_function_adapter.py
+++ b/omnisafe/adapter/offpolicy_barrier_function_adapter.py
@@ -17,23 +17,17 @@
 from __future__ import annotations
 
 import torch
-import numpy as np
+from sklearn.gaussian_process import GaussianProcessRegressor
 
 from omnisafe.adapter.offpolicy_adapter import OffPolicyAdapter
+from omnisafe.common.barrier_comp import BarrierCompensator
+from omnisafe.common.barrier_solver import PendulumSolver
 from omnisafe.common.buffer import VectorOffPolicyBuffer
 from omnisafe.common.logger import Logger
-from omnisafe.utils.config import Config
-from omnisafe.common.barrier_solver import PendulumSolver
-from omnisafe.common.robust_barrier_solver import CBFQPLayer
-from omnisafe.common.barrier_comp import BarrierCompensator
+from omnisafe.envs.wrapper import CostNormalize, RewardNormalize, Unsqueeze
 from omnisafe.models.actor_critic.constraint_actor_q_critic import ConstraintActorQCritic
-from omnisafe.common.robust_gp_model import DynamicsModel
+from omnisafe.utils.config import Config
 
-from omnisafe.envs.wrapper import (
-    CostNormalize,
-    RewardNormalize,
-    Unsqueeze,
-)
 
 class OffPolicyBarrierFunctionAdapter(OffPolicyAdapter):
 
@@ -64,18 +58,58 @@ def _wrapper(
             self._env = Unsqueeze(self._env, device=self._device)
         self._eval_env = Unsqueeze(self._eval_env, device=self._device)
 
-    def set_solver(self, solver: PendulumSolver):
+    def eval_policy(  # pylint: disable=too-many-locals
+        self,
+        episode: int,
+        agent: ConstraintActorQCritic,
+        logger: Logger,
+    ) -> None:
+        """Rollout the environment in an evaluation environment.
+
+        Args:
+            episode (int): Number of episodes.
+            agent (ConstraintActorCritic): Agent.
+            logger (Logger): Logger, to log ``EpRet``, ``EpCost``, ``EpLen``.
+        """
+        for _ in range(episode):
+            ep_ret, ep_cost, ep_len = 0.0, 0.0, 0
+            obs, _ = self._eval_env.reset()
+            obs = obs.to(self._device)
+
+            done = False
+            while not done:
+                act = agent.step(obs, deterministic=True)
+                final_act = self.get_safe_action(obs=obs, act=act, is_eval=True)
+                obs, reward, cost, terminated, truncated, info = self._eval_env.step(final_act)
+                obs, reward, cost, terminated, truncated = (
+                    torch.as_tensor(x, dtype=torch.float32, device=self._device)
+                    for x in (obs, reward, cost, terminated, truncated)
+                )
+                ep_ret += info.get('original_reward', reward).cpu()
+                ep_cost += info.get('original_cost', cost).cpu()
+                ep_len += 1
+                done = bool(terminated[0].item()) or bool(truncated[0].item())
+
+            logger.store(
+                {
+                    'Metrics/TestEpRet': ep_ret,
+                    'Metrics/TestEpCost': ep_cost,
+                    'Metrics/TestEpLen': ep_len,
+                },
+            )
+
+    def set_solver(self, solver: PendulumSolver) -> None:
         """Set the barrier function solver for Pendulum environment."""
         self.solver: PendulumSolver = solver
-        
-    def set_compensator(self, compensator: BarrierCompensator):
+
+    def set_compensator(self, compensator: BarrierCompensator) -> None:
         """Set the action compensator."""
         self.compensator: BarrierCompensator = compensator
 
-    def reset_gp_model(self):
+    def reset_gp_model(self) -> None:
         """Reset the gaussian processing model of barrier function solver."""
-        self.solver.GP_model_prev = self.solver.GP_model.copy()
-        self.solver.build_GP_model()
+        self.solver.gp_model_prev = self.solver.gp_model.copy()
+        self.solver.build_gp_model()
 
     def rollout(  # pylint: disable=too-many-locals
         self,
@@ -87,18 +121,16 @@ def rollout(  # pylint: disable=too-many-locals
     ) -> None:
         for _ in range(rollout_step):
             if use_rand_action:
-                act = torch.normal(torch.zeros(self.action_space.shape), torch.ones(self.action_space.shape)).unsqueeze(0)
+                act = (torch.rand(self.action_space.shape) * 2 - 1).unsqueeze(0).to(self._device)
             else:
                 act = agent.actor.predict(self._current_obs, deterministic=False)
-                
+
             final_act = self.get_safe_action(obs=self._current_obs, act=act)
 
             self.episode_rollout['obs'].append(self._current_obs)
             self.episode_rollout['final_act'].append(final_act)
 
             next_obs, reward, cost, terminated, truncated, info = self.step(final_act)
-            logger.store({'Metrics/angle': cost})
-            
             self._log_value(reward=reward, cost=cost, info=info)
 
             buffer.store(
@@ -115,18 +147,21 @@ def rollout(  # pylint: disable=too-many-locals
                 if done:
                     self._log_metrics(logger, idx)
                     compensator_loss = self.compensator.train(
-                        torch.cat(self.episode_rollout['obs']), 
-                        torch.cat(self.episode_rollout['approx_compensating_act']), 
+                        torch.cat(self.episode_rollout['obs']),
+                        torch.cat(self.episode_rollout['approx_compensating_act']),
                         torch.cat(self.episode_rollout['compensating_act']),
-                        )
+                    )
                     logger.store({'Value/Loss_compensator': compensator_loss.item()})
-                    self.solver.update_GP_dynamics(obs=torch.cat(self.episode_rollout['obs']), act=torch.cat(self.episode_rollout['final_act']))
-                    
+                    self.solver.update_gp_dynamics(
+                        obs=torch.cat(self.episode_rollout['obs']),
+                        act=torch.cat(self.episode_rollout['final_act']),
+                    )
+
                     self.episode_rollout['obs'] = []
                     self.episode_rollout['final_act'] = []
                     self.episode_rollout['approx_compensating_act'] = []
                     self.episode_rollout['compensating_act'] = []
-                    
+
                     self._reset_log(idx)
                     self._current_obs, _ = self._env.reset()
                     self.first_iter = 0
@@ -134,18 +169,29 @@ def rollout(  # pylint: disable=too-many-locals
                         self.reset_gp_model()
 
     @torch.no_grad
-    def get_safe_action(self, obs, act):
+    def get_safe_action(
+        self,
+        obs: torch.Tensor,
+        act: torch.Tensor,
+        is_eval: bool = False,
+    ) -> torch.Tensor:
         approx_compensating_act = self.compensator(obs=self._current_obs)
         compensated_act_mean_raw = act + approx_compensating_act
-        
+
         if self.first_iter:
-            [f, g, x, std] = self.solver.get_GP_dynamics(obs, use_prev_model = False)
+            [f, g, x, std] = self.solver.get_gp_dynamics(obs, use_prev_model=False)
         else:
-            [f, g, x, std] = self.solver.get_GP_dynamics(obs, use_prev_model = True)
-            
+            [f, g, x, std] = self.solver.get_gp_dynamics(obs, use_prev_model=True)
+
         compensating_act = self.solver.control_barrier(compensated_act_mean_raw, f, g, x, std)
         safe_act = compensated_act_mean_raw + compensating_act
 
-        self.episode_rollout['compensating_act'].append(compensating_act)
-        self.episode_rollout['approx_compensating_act'].append(approx_compensating_act)
-        return safe_act
\ No newline at end of file
+        if not is_eval:
+            self.episode_rollout['compensating_act'].append(compensating_act)
+            self.episode_rollout['approx_compensating_act'].append(approx_compensating_act)
+
+        return safe_act
+
+    @property
+    def gp_models(self) -> list[GaussianProcessRegressor]:
+        return self.solver.gp_models
diff --git a/omnisafe/adapter/robust_barrier_function_adapter.py b/omnisafe/adapter/robust_barrier_function_adapter.py
index f58f1e176..843676c7f 100644
--- a/omnisafe/adapter/robust_barrier_function_adapter.py
+++ b/omnisafe/adapter/robust_barrier_function_adapter.py
@@ -16,25 +16,20 @@
 
 from __future__ import annotations
 
+from typing import Any
+
 import torch
-import numpy as np
 
 from omnisafe.adapter.offpolicy_adapter import OffPolicyAdapter
 from omnisafe.common.buffer import VectorOffPolicyBuffer
 from omnisafe.common.logger import Logger
-from omnisafe.utils.config import Config
 from omnisafe.common.robust_barrier_solver import CBFQPLayer
-from omnisafe.common.barrier_comp import BarrierCompensator
+from omnisafe.common.robust_gp_model import DynamicsModel
+from omnisafe.envs.wrapper import CostNormalize, RewardNormalize, Unsqueeze
 from omnisafe.models.actor_critic.constraint_actor_q_critic import ConstraintActorQCritic
 from omnisafe.typing import OmnisafeSpace
-from omnisafe.common.robust_gp_model import DynamicsModel
-
+from omnisafe.utils.config import Config
 
-from omnisafe.envs.wrapper import (
-    CostNormalize,
-    RewardNormalize,
-    Unsqueeze,
-)
 
 class RobustBarrierFunctionAdapter(OffPolicyAdapter):
 
@@ -53,10 +48,10 @@ def _wrapper(
         cost_normalize: bool = True,
     ) -> None:
         """Wrapper the environment.
-        
+
         .. warning::
-            Since solving the optimization problem requires obtaining physical quantities with practical 
-            significance from state observations, the Barrier Function Adapter does not support 
+            Since solving the optimization problem requires obtaining physical quantities with practical
+            significance from state observations, the Barrier Function Adapter does not support
             normalization of observations.
 
         Args:
@@ -72,19 +67,56 @@ def _wrapper(
         if self._env.num_envs == 1:
             self._env = Unsqueeze(self._env, device=self._device)
         self._eval_env = Unsqueeze(self._eval_env, device=self._device)
-        # self._env = ActionScale(self._env, low=-1.0, high=1.0, device=self._device)
-        # self._eval_env = ActionScale(self._eval_env, low=-1.0, high=1.0, device=self._device)
-        
-    def set_solver(self, solver: CBFQPLayer):
+
+    def set_solver(self, solver: CBFQPLayer) -> None:
         """Set the barrier function solver for Pendulum environment."""
         self.solver: CBFQPLayer = solver
         self.solver.env = self._env
 
-    def set_dynamics_model(self, dynamics_model: DynamicsModel):
+    def set_dynamics_model(self, dynamics_model: DynamicsModel) -> None:
         """Set the dynamics model."""
         self.dynamics_model = dynamics_model
         self.dynamics_model.env = self._env
 
+    def eval_policy(  # pylint: disable=too-many-locals
+        self,
+        episode: int,
+        agent: ConstraintActorQCritic,
+        logger: Logger,
+    ) -> None:
+        """Rollout the environment with deterministic agent action.
+
+        Args:
+            episode (int): Number of episodes.
+            agent (ConstraintActorCritic): Agent.
+            logger (Logger): Logger, to log ``EpRet``, ``EpCost``, ``EpLen``.
+        """
+        for _ in range(episode):
+            ep_ret, ep_cost, ep_len = 0.0, 0.0, 0
+            obs, _ = self._eval_env.reset()
+            obs = obs.to(self._device)
+
+            done = False
+            while not done:
+                act = agent.step(obs, deterministic=True)
+                obs, reward, cost, terminated, truncated, info = self._eval_env.step(act)
+                obs, reward, cost, terminated, truncated = (
+                    torch.as_tensor(x, dtype=torch.float32, device=self._device)
+                    for x in (obs, reward, cost, terminated, truncated)
+                )
+                ep_ret += info.get('original_reward', reward).cpu()
+                ep_cost += info.get('original_cost', cost).cpu()
+                ep_len += 1
+                done = bool(terminated[0].item()) or bool(truncated[0].item())
+
+            logger.store(
+                {
+                    'Metrics/TestEpRet': ep_ret,
+                    'Metrics/TestEpCost': ep_cost,
+                    'Metrics/TestEpLen': ep_len,
+                },
+            )
+
     def rollout(  # pylint: disable=too-many-locals
         self,
         rollout_step: int,
@@ -108,14 +140,15 @@ def rollout(  # pylint: disable=too-many-locals
             use_rand_action (bool): Whether to use random action.
         """
         for _ in range(rollout_step):
-            state = self.dynamics_model.get_state(self._current_obs) # 动态模型将观测转换为状态，状态和观测之间有一个互逆的转换
+            state = self.dynamics_model.get_state(self._current_obs)
             self._current_steps += 1
             if use_rand_action:
-                act = torch.normal(torch.zeros(self.action_space.shape), torch.ones(self.action_space.shape)).unsqueeze(0).to(self._device)
+                act = (torch.rand(self.action_space.shape) * 2 - 1).unsqueeze(0).to(self._device)
             else:
                 act = agent.step(self._current_obs, deterministic=False)
 
             final_act = self.get_safe_action(obs=self._current_obs, act=act)
+
             next_obs, reward, cost, terminated, truncated, info = self.step(final_act)
             self._log_value(reward=reward, cost=cost, info=info)
 
@@ -127,11 +160,18 @@ def rollout(  # pylint: disable=too-many-locals
                 done=torch.logical_and(terminated, torch.logical_xor(terminated, truncated)),
                 next_obs=next_obs,
             )
-            
-            if self._ep_len[0] % 2 == 0 and self._num_episodes < self._cfgs.dynamics_model_cfgs.gp_max_episodes:
+
+            if (
+                self._ep_len[0] % 2 == 0
+                and self._num_episodes < self._cfgs.dynamics_model_cfgs.gp_max_episodes
+            ):
                 next_state = self.dynamics_model.get_state(next_obs)
-                self.dynamics_model.append_transition(state.cpu().detach().numpy(), final_act.cpu().detach().numpy(), next_state.cpu().detach().numpy(), t_batch=np.array([self._ep_len[0]*self._env.dt]))
-                
+                self.dynamics_model.append_transition(
+                    state.cpu().detach().numpy(),
+                    final_act.cpu().detach().numpy(),
+                    next_state.cpu().detach().numpy(),
+                )
+
             self._current_obs = next_obs
             for idx, done in enumerate(torch.logical_or(terminated, truncated)):
                 if done:
@@ -139,36 +179,24 @@ def rollout(  # pylint: disable=too-many-locals
                     self._reset_log(idx)
                     self._num_episodes += 1
                     self._current_obs, _ = self._env.reset()
-            
+
     @property
     def safe_action_space(self) -> OmnisafeSpace:
         if hasattr(self._env, 'safe_action_space'):
             return self._env.safe_action_space
-        else:
-            return self._env.action_space
-            
-    def get_safe_action(self, obs, act, modular=False, cbf_info_batch=None):
-        """Given a nominal action, returns a minimally-altered safe action to take.
-
-        Parameters
-        ----------
-        obs : torch.tensor
-        act : torch.tensor
-        dynamics_model : DynamicsModel
-
-        Returns
-        -------
-        safe_act : torch.tensor
-            Safe actions to be taken (cbf_action + action).
-        """
+        return self._env.action_space
+
+    def get_safe_action(self, obs: torch.Tensor, act: torch.Tensor) -> torch.Tensor:
+
         state_batch = self.dynamics_model.get_state(obs)
         mean_pred_batch, sigma_pred_batch = self.dynamics_model.predict_disturbance(state_batch)
-        safe_act = self.solver.get_safe_action(state_batch, act, mean_pred_batch, sigma_pred_batch, modular=modular, cbf_info_batch=cbf_info_batch)
 
-        return safe_act
+        return self.solver.get_safe_action(
+            state_batch,
+            act,
+            mean_pred_batch,
+            sigma_pred_batch,
+        )
 
-    def __getattr__(self, name):
-        try:
-            return getattr(self._env, name)
-        except AttributeError:
-            raise AttributeError(f"'{type(self).__name__}' object has no attribute '{name}'")
\ No newline at end of file
+    def __getattr__(self, name: str) -> Any:
+        return getattr(self._env, name)
diff --git a/omnisafe/algorithms/__init__.py b/omnisafe/algorithms/__init__.py
index f25928ad2..da82ecbea 100644
--- a/omnisafe/algorithms/__init__.py
+++ b/omnisafe/algorithms/__init__.py
@@ -27,16 +27,16 @@
 from omnisafe.algorithms.off_policy import (
     CRABS,
     DDPG,
+    DDPGCBF,
     DDPGPID,
     SAC,
     SACPID,
+    SACRCBF,
     TD3,
     TD3PID,
     DDPGLag,
     SACLag,
     TD3Lag,
-    SACRCBF,
-    DDPGCBF,
 )
 
 # Offline Safe
@@ -53,10 +53,12 @@
     PPO,
     RCPO,
     TRPO,
+    TRPOCBF,
     TRPOPID,
     NaturalPG,
     OnCRPO,
     PolicyGradient,
+    PPOBetaCBF,
     PPOEarlyTerminated,
     PPOLag,
     PPOSaute,
@@ -65,8 +67,6 @@
     TRPOLag,
     TRPOSaute,
     TRPOSimmerPID,
-    TRPOCBF,
-    PPOBetaCBF,
 )
 
 
diff --git a/omnisafe/algorithms/off_policy/__init__.py b/omnisafe/algorithms/off_policy/__init__.py
index e87bd82f2..5a297c49f 100644
--- a/omnisafe/algorithms/off_policy/__init__.py
+++ b/omnisafe/algorithms/off_policy/__init__.py
@@ -16,16 +16,16 @@
 
 from omnisafe.algorithms.off_policy.crabs import CRABS
 from omnisafe.algorithms.off_policy.ddpg import DDPG
+from omnisafe.algorithms.off_policy.ddpg_cbf import DDPGCBF
 from omnisafe.algorithms.off_policy.ddpg_lag import DDPGLag
 from omnisafe.algorithms.off_policy.ddpg_pid import DDPGPID
 from omnisafe.algorithms.off_policy.sac import SAC
 from omnisafe.algorithms.off_policy.sac_lag import SACLag
 from omnisafe.algorithms.off_policy.sac_pid import SACPID
+from omnisafe.algorithms.off_policy.sac_rcbf import SACRCBF
 from omnisafe.algorithms.off_policy.td3 import TD3
 from omnisafe.algorithms.off_policy.td3_lag import TD3Lag
 from omnisafe.algorithms.off_policy.td3_pid import TD3PID
-from omnisafe.algorithms.off_policy.sac_rcbf import SACRCBF
-from omnisafe.algorithms.off_policy.ddpg_cbf import DDPGCBF
 
 
 __all__ = ['DDPG', 'TD3', 'SAC', 'DDPGLag', 'TD3Lag', 'SACLag', 'DDPGPID', 'TD3PID', 'SACPID', 'SACRCBF', 'DDPGCBF', 'CRABS']
diff --git a/omnisafe/algorithms/off_policy/ddpg.py b/omnisafe/algorithms/off_policy/ddpg.py
index 517d8c0be..f0c633220 100644
--- a/omnisafe/algorithms/off_policy/ddpg.py
+++ b/omnisafe/algorithms/off_policy/ddpg.py
@@ -188,13 +188,7 @@ def _init_log(self) -> None:
             config=self._cfgs,
         )
 
-        what_to_save: dict[str, Any] = {}
-        what_to_save['pi'] = self._actor_critic.actor
-        if self._cfgs.algo_cfgs.obs_normalize:
-            obs_normalizer = self._env.save()['obs_normalizer']
-            what_to_save['obs_normalizer'] = obs_normalizer
-
-        self._logger.setup_torch_saver(what_to_save)
+        self._log_what_to_save()
         self._logger.torch_save()
 
         self._logger.register_key(
@@ -338,6 +332,7 @@ def learn(self) -> tuple[float, float, float]:
             # save model to disk
             if (epoch + 1) % self._cfgs.logger_cfgs.save_model_freq == 0:
                 self._logger.torch_save()
+                self._specific_save()
 
         ep_ret = self._logger.get_stats('Metrics/EpRet')[0]
         ep_cost = self._logger.get_stats('Metrics/EpCost')[0]
@@ -562,3 +557,17 @@ def _log_when_not_update(self) -> None:
                     'Value/cost_critic': 0.0,
                 },
             )
+
+    def _log_what_to_save(self) -> dict[str, Any]:
+        """Define what need to be saved below."""
+        what_to_save: dict[str, Any] = {}
+
+        what_to_save['pi'] = self._actor_critic.actor
+        if self._cfgs.algo_cfgs.obs_normalize:
+            obs_normalizer = self._env.save()['obs_normalizer']
+            what_to_save['obs_normalizer'] = obs_normalizer
+
+        self._logger.setup_torch_saver(what_to_save)
+
+    def _specific_save(self) -> None:
+        """Save some algorithms specific models per epoch."""
diff --git a/omnisafe/algorithms/off_policy/ddpg_cbf.py b/omnisafe/algorithms/off_policy/ddpg_cbf.py
index 12692db67..ad1306d5b 100644
--- a/omnisafe/algorithms/off_policy/ddpg_cbf.py
+++ b/omnisafe/algorithms/off_policy/ddpg_cbf.py
@@ -14,13 +14,21 @@
 # ==============================================================================
 """Implementation of the DDPG algorithm with Control Barrier Function."""
 
+
+from __future__ import annotations
+
+import os
+
+import joblib
 import torch
 
+from omnisafe.adapter.offpolicy_barrier_function_adapter import OffPolicyBarrierFunctionAdapter
 from omnisafe.algorithms import registry
 from omnisafe.algorithms.off_policy.ddpg import DDPG
-from omnisafe.common.barrier_solver import PendulumSolver
-from omnisafe.adapter.offpolicy_barrier_function_adapter import OffPolicyBarrierFunctionAdapter
 from omnisafe.common.barrier_comp import BarrierCompensator
+from omnisafe.common.barrier_solver import PendulumSolver
+from omnisafe.typing import Any
+from omnisafe.utils.distributed import get_rank
 
 
 @registry.register
@@ -35,7 +43,7 @@ class DDPGCBF(DDPG):
     """
 
     def _init_env(self) -> None:
-        self._env: OffPolicyBarrierFunctionAdapter=OffPolicyBarrierFunctionAdapter(
+        self._env: OffPolicyBarrierFunctionAdapter = OffPolicyBarrierFunctionAdapter(
             self._env_id,
             self._cfgs.train_cfgs.vector_env_nums,
             self._seed,
@@ -46,11 +54,11 @@ def _init_env(self) -> None:
             obs_dim=self._env.observation_space.shape[0],
             act_dim=self._env.action_space.shape[0],
             cfgs=self._cfgs.compensator_cfgs,
-        )
-        
+        ).to(self._device)
+
         self._env.set_compensator(compensator=compensator)
         self._env.set_solver(solver=solver)
-        
+
         assert (
             self._cfgs.algo_cfgs.steps_per_epoch % self._cfgs.train_cfgs.vector_env_nums == 0
         ), 'The number of steps per epoch is not divisible by the number of environments.'
@@ -58,26 +66,34 @@ def _init_env(self) -> None:
         assert (
             int(self._cfgs.train_cfgs.total_steps) % self._cfgs.algo_cfgs.steps_per_epoch == 0
         ), 'The total number of steps is not divisible by the number of steps per epoch.'
-        self._epochs: int=int(
+        self._epochs: int = int(
             self._cfgs.train_cfgs.total_steps // self._cfgs.algo_cfgs.steps_per_epoch,
         )
-        self._epoch: int=0
-        self._steps_per_epoch: int=(
+        self._epoch: int = 0
+        self._steps_per_epoch: int = (
             self._cfgs.algo_cfgs.steps_per_epoch // self._cfgs.train_cfgs.vector_env_nums
         )
 
-        self._update_cycle: int=self._cfgs.algo_cfgs.update_cycle
+        self._update_cycle: int = self._cfgs.algo_cfgs.update_cycle
         assert (
             self._steps_per_epoch % self._update_cycle == 0
         ), 'The number of steps per epoch is not divisible by the number of steps per sample.'
-        self._samples_per_epoch: int=self._steps_per_epoch // self._update_cycle
-        self._update_count: int=0
-   
+        self._samples_per_epoch: int = self._steps_per_epoch // self._update_cycle
+        self._update_count: int = 0
+
     def _init(self) -> None:
         super()._init()
-        self._buf.add_field(name='approx_compensating_act', shape=self._env.action_space.shape, dtype=torch.float32)
-        self._buf.add_field(name='compensating_act', shape=self._env.action_space.shape, dtype=torch.float32)
-        
+        self._buf.add_field(
+            name='approx_compensating_act',
+            shape=self._env.action_space.shape,
+            dtype=torch.float32,
+        )
+        self._buf.add_field(
+            name='compensating_act',
+            shape=self._env.action_space.shape,
+            dtype=torch.float32,
+        )
+
     def _init_log(self) -> None:
         # """Log the DDPGRCBF specific information.
 
@@ -88,6 +104,28 @@ def _init_log(self) -> None:
         # +----------------------------+--------------------------+
         # """
         super()._init_log()
-        if self._cfgs.env_id == 'Pendulum-v1':
-            self._logger.register_key('Metrics/angle', min_and_max=True)
-        self._logger.register_key('Value/Loss_compensator')
\ No newline at end of file
+        self._logger.register_key('Value/Loss_compensator')
+
+    def _specific_save(self) -> None:
+        """Save some algorithms specific models per epoch."""
+        super()._specific_save()
+        if get_rank() == 0:
+            path = os.path.join(
+                self._logger.log_dir,
+                'gp_model_save',
+                f'gaussian_process_regressor_{self._logger.current_epoch}.pkl',
+            )
+            os.makedirs(os.path.dirname(path), exist_ok=True)
+            joblib.dump(self._env.gp_models, path)
+
+    def _log_what_to_save(self) -> dict[str, Any]:
+        """Define what need to be saved below."""
+        what_to_save: dict[str, Any] = {}
+
+        what_to_save['pi'] = self._actor_critic.actor
+        what_to_save['compensator'] = self._env.compensator
+        if self._cfgs.algo_cfgs.obs_normalize:
+            obs_normalizer = self._env.save()['obs_normalizer']
+            what_to_save['obs_normalizer'] = obs_normalizer
+
+        self._logger.setup_torch_saver(what_to_save)
diff --git a/omnisafe/algorithms/off_policy/sac_rcbf.py b/omnisafe/algorithms/off_policy/sac_rcbf.py
index e980025c3..9fbd20a39 100644
--- a/omnisafe/algorithms/off_policy/sac_rcbf.py
+++ b/omnisafe/algorithms/off_policy/sac_rcbf.py
@@ -15,16 +15,20 @@
 """Implementation of the Soft Actor-Critic algorithm with Robust Control Barrier Function."""
 
 
+from __future__ import annotations
+
+import os
+
 import torch
 from torch import nn
 from torch.nn.utils.clip_grad import clip_grad_norm_
 
+from omnisafe.adapter.robust_barrier_function_adapter import RobustBarrierFunctionAdapter
 from omnisafe.algorithms import registry
 from omnisafe.algorithms.off_policy.sac import SAC
 from omnisafe.common.robust_barrier_solver import CBFQPLayer
-from omnisafe.adapter.robust_barrier_function_adapter import RobustBarrierFunctionAdapter
-from omnisafe.common.barrier_comp import BarrierCompensator
 from omnisafe.common.robust_gp_model import DynamicsModel
+from omnisafe.utils.distributed import get_rank
 
 
 @registry.register
@@ -39,24 +43,24 @@ class SACRCBF(SAC):
     """
 
     def _init_env(self) -> None:
-        self._env: RobustBarrierFunctionAdapter=RobustBarrierFunctionAdapter(
+        self._env: RobustBarrierFunctionAdapter = RobustBarrierFunctionAdapter(
             self._env_id,
             self._cfgs.train_cfgs.vector_env_nums,
             self._seed,
             self._cfgs,
         )
-        solver=CBFQPLayer(
+        solver = CBFQPLayer(
             env=self._env,
             device=self._cfgs.train_cfgs.device,
             gamma_b=self._cfgs.cbf_cfgs.gamma_b,
             k_d=self._cfgs.cbf_cfgs.k_d,
             l_p=self._cfgs.cbf_cfgs.l_p,
         )
-        dynamics_model=DynamicsModel(env=self._env)
-        
+        dynamics_model = DynamicsModel(env=self._env)
+
         self._env.set_dynamics_model(dynamics_model=dynamics_model)
         self._env.set_solver(solver=solver)
-            
+
         assert (
             self._cfgs.algo_cfgs.steps_per_epoch % self._cfgs.train_cfgs.vector_env_nums == 0
         ), 'The number of steps per epoch is not divisible by the number of environments.'
@@ -64,33 +68,20 @@ def _init_env(self) -> None:
         assert (
             int(self._cfgs.train_cfgs.total_steps) % self._cfgs.algo_cfgs.steps_per_epoch == 0
         ), 'The total number of steps is not divisible by the number of steps per epoch.'
-        self._epochs: int=int(
+        self._epochs: int = int(
             self._cfgs.train_cfgs.total_steps // self._cfgs.algo_cfgs.steps_per_epoch,
         )
-        self._epoch: int=0
-        self._steps_per_epoch: int=(
+        self._epoch: int = 0
+        self._steps_per_epoch: int = (
             self._cfgs.algo_cfgs.steps_per_epoch // self._cfgs.train_cfgs.vector_env_nums
         )
 
-        self._update_cycle: int=self._cfgs.algo_cfgs.update_cycle
+        self._update_cycle: int = self._cfgs.algo_cfgs.update_cycle
         assert (
             self._steps_per_epoch % self._update_cycle == 0
         ), 'The number of steps per epoch is not divisible by the number of steps per sample.'
-        self._samples_per_epoch: int=self._steps_per_epoch // self._update_cycle
-        self._update_count: int=0
-
-    def _init_log(self) -> None:
-        # """Log the SACRCBF specific information.
-
-        # +----------------------------+--------------------------+
-        # | Things to log              | Description              |
-        # +============================+==========================+
-        # | Metrics/LagrangeMultiplier | The Lagrange multiplier. |
-        # +----------------------------+--------------------------+
-        # """
-        super()._init_log()
-        if self._cfgs.env_id == 'Pendulum-v1':
-            self._logger.register_key('Metrics/angle', min_and_max=True)
+        self._samples_per_epoch: int = self._steps_per_epoch // self._update_cycle
+        self._update_count: int = 0
 
     def _update_actor(
         self,
@@ -163,7 +154,7 @@ def _update_reward_critic(
                 'Value/reward_critic': q1_value_r.mean().item(),
             },
         )
-        
+
     def _loss_pi(
         self,
         obs: torch.Tensor,
@@ -172,4 +163,26 @@ def _loss_pi(
         action = self._env.get_safe_action(obs, action)
         log_prob = self._actor_critic.actor.log_prob(action)
         q1_value_r, q2_value_r = self._actor_critic.reward_critic(obs, action)
-        return (self._alpha * log_prob - torch.min(q1_value_r, q2_value_r)).mean()
\ No newline at end of file
+        return (self._alpha * log_prob - torch.min(q1_value_r, q2_value_r)).mean()
+
+    def _specific_save(self) -> None:
+        """Save some algorithms specific models per epoch."""
+        super()._specific_save()
+        if get_rank() == 0:
+            path = os.path.join(self._logger.log_dir, 'gp_model_save')
+            os.makedirs(path, exist_ok=True)
+            train_x = self._env.dynamics_model.train_x
+            train_y = self._env.dynamics_model.train_y
+            disturb_estimators = self._env.dynamics_model.disturb_estimators
+            weights = []
+            for i in range(len(disturb_estimators)):
+                weights.append(disturb_estimators[i].model.state_dict())
+            torch.save(weights, os.path.join(path, f'gp_models_{self._logger.current_epoch}.pkl'))
+            torch.save(
+                train_x,
+                os.path.join(path, f'gp_models_train_x_{self._logger.current_epoch}.pkl'),
+            )
+            torch.save(
+                train_y,
+                os.path.join(path, f'gp_models_train_y_{self._logger.current_epoch}.pkl'),
+            )
diff --git a/omnisafe/algorithms/on_policy/__init__.py b/omnisafe/algorithms/on_policy/__init__.py
index 06932a307..8351ecf2d 100644
--- a/omnisafe/algorithms/on_policy/__init__.py
+++ b/omnisafe/algorithms/on_policy/__init__.py
@@ -15,6 +15,7 @@
 """On-policy algorithms."""
 
 from omnisafe.algorithms.on_policy import (
+    barrier_function,
     base,
     early_terminated,
     first_order,
@@ -25,8 +26,8 @@
     saute,
     second_order,
     simmer,
-    barrier_function,
 )
+from omnisafe.algorithms.on_policy.barrier_function import TRPOCBF, PPOBetaCBF
 from omnisafe.algorithms.on_policy.base import PPO, TRPO, NaturalPG, PolicyGradient
 from omnisafe.algorithms.on_policy.early_terminated import PPOEarlyTerminated, TRPOEarlyTerminated
 from omnisafe.algorithms.on_policy.first_order import CUP, FOCOPS
@@ -37,7 +38,6 @@
 from omnisafe.algorithms.on_policy.saute import PPOSaute, TRPOSaute
 from omnisafe.algorithms.on_policy.second_order import CPO, PCPO
 from omnisafe.algorithms.on_policy.simmer import PPOSimmerPID, TRPOSimmerPID
-from omnisafe.algorithms.on_policy.barrier_function import TRPOCBF, PPOBetaCBF
 
 
 __all__ = [
diff --git a/omnisafe/algorithms/on_policy/barrier_function/__init__.py b/omnisafe/algorithms/on_policy/barrier_function/__init__.py
index 273ca2831..dacdc3c4d 100644
--- a/omnisafe/algorithms/on_policy/barrier_function/__init__.py
+++ b/omnisafe/algorithms/on_policy/barrier_function/__init__.py
@@ -14,8 +14,8 @@
 # ==============================================================================
 """Control Barrier Function Safe Reinforcement Learning algorithms."""
 
-from omnisafe.algorithms.on_policy.barrier_function.trpo_cbf import TRPOCBF
 from omnisafe.algorithms.on_policy.barrier_function.ppo_cbf import PPOBetaCBF
+from omnisafe.algorithms.on_policy.barrier_function.trpo_cbf import TRPOCBF
 
 
 __all__ = [
diff --git a/omnisafe/algorithms/on_policy/barrier_function/ppo_cbf.py b/omnisafe/algorithms/on_policy/barrier_function/ppo_cbf.py
index e7711ed3c..24b27d939 100644
--- a/omnisafe/algorithms/on_policy/barrier_function/ppo_cbf.py
+++ b/omnisafe/algorithms/on_policy/barrier_function/ppo_cbf.py
@@ -26,10 +26,9 @@
 
 @registry.register
 class PPOBetaCBF(PPO):
-    
+
     def _init_log(self) -> None:
         super()._init_log()
-        self._logger.register_key('Metrics/angle', min_and_max=True)
         self._logger.register_key('Value/Loss_compensator')
 
     def _init_env(self) -> None:
@@ -48,10 +47,6 @@ def _init_env(self) -> None:
             // self._cfgs.train_cfgs.vector_env_nums
         )
 
-    def _init_log(self) -> None:
-        super()._init_log()
-        self._logger.register_key('Metrics/angle', min_and_max=True)
-        
     def _loss_pi(
         self,
         obs: torch.Tensor,
@@ -85,7 +80,6 @@ def _loss_pi(
         """
         distribution = self._actor_critic.actor(obs)
         logp_ = self._actor_critic.actor.log_prob(act)
-        std = self._actor_critic.actor.std
         ratio = torch.exp(logp_ - logp)
         ratio_cliped = torch.clamp(
             ratio,
@@ -103,4 +97,4 @@ def _loss_pi(
                 'Loss/Loss_pi': loss.mean().item(),
             },
         )
-        return loss
\ No newline at end of file
+        return loss
diff --git a/omnisafe/algorithms/on_policy/barrier_function/trpo_cbf.py b/omnisafe/algorithms/on_policy/barrier_function/trpo_cbf.py
index 404776d72..3fceec4f7 100644
--- a/omnisafe/algorithms/on_policy/barrier_function/trpo_cbf.py
+++ b/omnisafe/algorithms/on_policy/barrier_function/trpo_cbf.py
@@ -22,13 +22,14 @@
 from omnisafe.adapter.barrier_function_adapter import BarrierFunctionAdapter
 from omnisafe.algorithms import registry
 from omnisafe.algorithms.on_policy.base.trpo import TRPO
-from omnisafe.utils import distributed
-from omnisafe.common.barrier_solver import PendulumSolver
 from omnisafe.common.barrier_comp import BarrierCompensator
+from omnisafe.common.barrier_solver import PendulumSolver
+from omnisafe.utils import distributed
+
 
 @registry.register
 class TRPOCBF(TRPO):
-    
+
     def _init_log(self) -> None:
         super()._init_log()
         self._logger.register_key('Metrics/angle', min_and_max=True)
@@ -51,18 +52,26 @@ def _init_env(self) -> None:
         )
         self.solver = PendulumSolver(device=self._cfgs.train_cfgs.device)
         self.compensator = BarrierCompensator(
-            obs_dim = self._env.observation_space.shape[0],
-            act_dim = self._env.action_space.shape[0],
-            cfgs = self._cfgs.compensator_cfgs,
+            obs_dim=self._env.observation_space.shape[0],
+            act_dim=self._env.action_space.shape[0],
+            cfgs=self._cfgs.compensator_cfgs,
         )
         self._env.set_solver(solver=self.solver)
         self._env.set_compensator(compensator=self.compensator)
-        
+
     def _init(self) -> None:
         super()._init()
-        self._buf.add_field(name='approx_compensating_act', shape=self._env.action_space.shape, dtype=torch.float32)
-        self._buf.add_field(name='compensating_act', shape=self._env.action_space.shape, dtype=torch.float32)
-        
+        self._buf.add_field(
+            name='approx_compensating_act',
+            shape=self._env.action_space.shape,
+            dtype=torch.float32,
+        )
+        self._buf.add_field(
+            name='compensating_act',
+            shape=self._env.action_space.shape,
+            dtype=torch.float32,
+        )
+
     def _update(self) -> None:
         """Update actor, critic.
 
@@ -77,8 +86,18 @@ def _update(self) -> None:
             accepted.
         """
         data = self._buf.get()
-        
-        obs, act, logp, target_value_r, target_value_c, adv_r, adv_c, approx_compensating_act, compensating_act = (
+
+        (
+            obs,
+            act,
+            logp,
+            target_value_r,
+            target_value_c,
+            adv_r,
+            adv_c,
+            approx_compensating_act,
+            compensating_act,
+        ) = (
             data['obs'],
             data['act'],
             data['logp'],
@@ -91,7 +110,11 @@ def _update(self) -> None:
         )
 
         self._update_actor(obs, act, logp, adv_r, adv_c)
-        compensator_loss = self._env.compensator.train(observation=obs, approx_compensating_act=approx_compensating_act, compensating_act=compensating_act)
+        compensator_loss = self._env.compensator.train(
+            observation=obs,
+            approx_compensating_act=approx_compensating_act,
+            compensating_act=compensating_act,
+        )
         dataloader = DataLoader(
             dataset=TensorDataset(obs, target_value_r, target_value_c),
             batch_size=self._cfgs.algo_cfgs.batch_size,
diff --git a/omnisafe/algorithms/on_policy/base/ppo.py b/omnisafe/algorithms/on_policy/base/ppo.py
index 69f0ce4e9..463b286c8 100644
--- a/omnisafe/algorithms/on_policy/base/ppo.py
+++ b/omnisafe/algorithms/on_policy/base/ppo.py
@@ -16,8 +16,6 @@
 
 from __future__ import annotations
 
-import torch
-
 from omnisafe.algorithms import registry
 from omnisafe.algorithms.on_policy.base.policy_gradient import PolicyGradient
 
@@ -31,57 +29,3 @@ class PPO(PolicyGradient):
         - Authors: John Schulman, Filip Wolski, Prafulla Dhariwal, Alec Radford, Oleg Klimov.
         - URL: `PPO <https://arxiv.org/abs/1707.06347>`_
     """
-
-    def _loss_pi(
-        self,
-        obs: torch.Tensor,
-        act: torch.Tensor,
-        logp: torch.Tensor,
-        adv: torch.Tensor,
-    ) -> torch.Tensor:
-        r"""Computing pi/actor loss.
-
-        In Proximal Policy Optimization, the loss is defined as:
-
-        .. math::
-
-            L^{CLIP} = \underset{s_t \sim \rho_{\theta}}{\mathbb{E}} \left[
-                \min ( r_t A^{R}_{\pi_{\theta}} (s_t, a_t) , \text{clip} (r_t, 1 - \epsilon, 1 + \epsilon)
-                A^{R}_{\pi_{\theta}} (s_t, a_t)
-            \right]
-
-        where :math:`r_t = \frac{\pi_{\theta}^{'} (a_t|s_t)}{\pi_{\theta} (a_t|s_t)}`,
-        :math:`\epsilon` is the clip parameter, and :math:`A^{R}_{\pi_{\theta}} (s_t, a_t)` is the
-        advantage.
-
-        Args:
-            obs (torch.Tensor): The ``observation`` sampled from buffer.
-            act (torch.Tensor): The ``action`` sampled from buffer.
-            logp (torch.Tensor): The ``log probability`` of action sampled from buffer.
-            adv (torch.Tensor): The ``advantage`` processed. ``reward_advantage`` here.
-
-        Returns:
-            The loss of pi/actor.
-        """
-        distribution = self._actor_critic.actor(obs)
-        logp_ = self._actor_critic.actor.log_prob(act)
-        std = self._actor_critic.actor.std
-        ratio = torch.exp(logp_ - logp)
-        ratio_cliped = torch.clamp(
-            ratio,
-            1 - self._cfgs.algo_cfgs.clip,
-            1 + self._cfgs.algo_cfgs.clip,
-        )
-        loss = -torch.min(ratio * adv, ratio_cliped * adv).mean()
-        loss -= self._cfgs.algo_cfgs.entropy_coef * distribution.entropy().mean()
-        # useful extra info
-        entropy = distribution.entropy().mean().item()
-        self._logger.store(
-            {
-                'Train/Entropy': entropy,
-                'Train/PolicyRatio': ratio,
-                'Train/PolicyStd': std,
-                'Loss/Loss_pi': loss.mean().item(),
-            },
-        )
-        return loss
diff --git a/omnisafe/common/barrier_comp.py b/omnisafe/common/barrier_comp.py
index 57d39a8d6..1a27d5863 100644
--- a/omnisafe/common/barrier_comp.py
+++ b/omnisafe/common/barrier_comp.py
@@ -17,8 +17,10 @@
 
 import torch
 from torch import optim
-from omnisafe.utils.model import build_mlp_network
+
 from omnisafe.utils.config import Config
+from omnisafe.utils.model import build_mlp_network
+
 
 class BarrierCompensator(torch.nn.Module):
     """A module that represents a barrier compensator using a multi-layer perceptron (MLP) network.
@@ -39,9 +41,9 @@ class BarrierCompensator(torch.nn.Module):
         act_dim (int): Dimension of the action space.
         cfgs (Config): Configuration parameters for the network and training.
     """
-    
-    def __init__(self, obs_dim: int, act_dim: int, cfgs: Config):
-        super(BarrierCompensator, self).__init__()
+
+    def __init__(self, obs_dim: int, act_dim: int, cfgs: Config) -> None:
+        super().__init__()
         self._cfgs: Config = cfgs
         self.model: torch.nn.Module = build_mlp_network(
             sizes=[obs_dim, *self._cfgs.hidden_sizes, act_dim],
@@ -49,7 +51,7 @@ def __init__(self, obs_dim: int, act_dim: int, cfgs: Config):
             weight_initialization_mode=self._cfgs.weight_initialization_mode,
         )
         self.optimizer: optim.Adam = optim.Adam(self.parameters(), lr=self._cfgs.lr)
-            
+
     def forward(self, obs: torch.Tensor) -> torch.Tensor:
         """Estimate the sum of previous compensating actions.
 
@@ -61,7 +63,12 @@ def forward(self, obs: torch.Tensor) -> torch.Tensor:
         """
         return self.model(obs)
 
-    def train(self, observation: torch.Tensor, approx_compensating_act: torch.Tensor, compensating_act: torch.Tensor) -> torch.Tensor:
+    def train(
+        self,
+        observation: torch.Tensor,
+        approx_compensating_act: torch.Tensor,
+        compensating_act: torch.Tensor,
+    ) -> torch.Tensor:
         """Train the barrier compensator model.
 
         This method updates the model parameters to minimize the difference between the model's output and the
@@ -79,8 +86,8 @@ def train(self, observation: torch.Tensor, approx_compensating_act: torch.Tensor
         for _ in range(self._cfgs.update_iters):
             target = approx_compensating_act + compensating_act
             self.optimizer.zero_grad()
-            loss = torch.pow((self(observation)-target), 2).mean()
+            loss = torch.pow((self(observation) - target), 2).mean()
             loss.backward()
             self.optimizer.step()
-            
+
         return loss
diff --git a/omnisafe/common/barrier_solver.py b/omnisafe/common/barrier_solver.py
index 1c11ffffb..b00af906e 100644
--- a/omnisafe/common/barrier_solver.py
+++ b/omnisafe/common/barrier_solver.py
@@ -15,14 +15,17 @@
 """Implementation of the Control Barrier Function Solver."""
 
 from __future__ import annotations
+
 import warnings
-warnings.filterwarnings("ignore")
+
+import joblib
 import numpy as np
 import torch
-from cvxopt import matrix
-from cvxopt import solvers
+from cvxopt import matrix, solvers
 from sklearn.gaussian_process import GaussianProcessRegressor
-from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C
+from sklearn.gaussian_process.kernels import RBF
+from sklearn.gaussian_process.kernels import ConstantKernel as C
+
 
 class PendulumSolver:
     """Solver for the pendulum problem using Gaussian Process models.
@@ -35,9 +38,14 @@ class PendulumSolver:
         device (str): Device to run the computations on.
     """
 
-    def __init__(self, action_size: int = 1, observation_size: int = 3, 
-                 torque_bound: float = 15., max_speed: float = 60., 
-                 device: str = 'cpu') -> None:
+    def __init__(
+        self,
+        action_size: int = 1,
+        observation_size: int = 3,
+        torque_bound: float = 15.0,
+        max_speed: float = 60.0,
+        device: str = 'cpu',
+    ) -> None:
         """Initializes the PendulumSolver with specified parameters.
 
         Args:
@@ -56,29 +64,45 @@ def __init__(self, action_size: int = 1, observation_size: int = 3,
         self._gamma_b = 0.5
         self._kd = 1.5
         self._build_barrier()
-        self.build_GP_model()
-        self.GP_model_prev = None
+        self.build_gp_model()
+        self.gp_model_prev = None
+        warnings.filterwarnings('ignore')
 
-    def build_GP_model(self) -> None:
+    def build_gp_model(self, save_dir: str | None = None) -> None:
         """Builds the Gaussian Process model."""
         gp_list = []
         noise = 0.01
         for _ in range(self.observation_size - 1):
-            kern = C(1.0, (1e-3, 1e3)) * RBF(10, (1e-2, 1e2))
-            gp = GaussianProcessRegressor(kernel=kern, alpha=noise, n_restarts_optimizer=10)
-            gp_list.append(gp)
-        self.GP_model = gp_list
+            if not save_dir:
+                kern = C(1.0, (1e-3, 1e3)) * RBF(10, (1e-2, 1e2))
+                gp = GaussianProcessRegressor(kernel=kern, alpha=noise, n_restarts_optimizer=10)
+                gp_list.append(gp)
+            else:
+                gp_list = joblib.load(save_dir)
+        self.gp_model = gp_list
+
+    @property
+    def gp_models(self) -> list[GaussianProcessRegressor]:
+        """Return all gaussian process regressor for saving."""
+        return self.gp_model
 
     def _build_barrier(self) -> None:
         """Builds the barrier for the pendulum solver."""
-        self.P = matrix(np.diag([1., 1e16]), tc='d')
+        self.P = matrix(np.diag([1.0, 1e16]), tc='d')
         self.q = matrix(np.zeros(self.action_size + 1))
         self.h1 = np.array([1, 0.01])
         self.h2 = np.array([1, -0.01])
         self.h3 = np.array([-1, 0.01])
         self.h4 = np.array([-1, -0.01])
 
-    def control_barrier(self, original_action: torch.Tensor, f: np.ndarray, g: np.ndarray, x: np.ndarray, std: np.ndarray) -> torch.Tensor:
+    def control_barrier(
+        self,
+        original_action: torch.Tensor,
+        f: np.ndarray,
+        g: np.ndarray,
+        x: np.ndarray,
+        std: np.ndarray,
+    ) -> torch.Tensor:
         """
         Adjusts the original action using a control barrier function to ensure
         that the action complies with the system's physical constraints.
@@ -97,49 +121,64 @@ def control_barrier(self, original_action: torch.Tensor, f: np.ndarray, g: np.nd
         # Define gamma for the barrier function
         gamma_b = 0.5
         kd = 1.5
-        u_rl = original_action.detach().numpy()
-        # u_rl*=self.torque_bound
+        u_rl = original_action.cpu().detach().numpy()
 
         # Set up Quadratic Program to satisfy Control Barrier Function
         G = np.array(
             [
                 [
-                    -np.dot(self.h1, g), 
-                    -np.dot(self.h2, g), 
-                    -np.dot(self.h3, g), 
-                    -np.dot(self.h4, g), 
+                    -np.dot(self.h1, g),
+                    -np.dot(self.h2, g),
+                    -np.dot(self.h3, g),
+                    -np.dot(self.h4, g),
                     1,
-                    -1, 
-                    g[1], 
-                    -g[1]
-                ], 
+                    -1,
+                    g[1],
+                    -g[1],
+                ],
                 [
-                    -1, 
-                    -1, 
-                    -1, 
-                    -1, 
-                    0, 
-                    0, 
-                    0, 
-                    0
-                ]
-            ]
+                    -1,
+                    -1,
+                    -1,
+                    -1,
+                    0,
+                    0,
+                    0,
+                    0,
+                ],
+            ],
         )
         G = np.transpose(G)
         h = np.array(
             [
-                gamma_b * self.F + np.dot(self.h1, f) + np.dot(self.h1, g) * u_rl - (1 - gamma_b) * np.dot(self.h1, x) - kd * np.abs(np.dot(self.h1, std)),
-                    gamma_b * self.F + np.dot(self.h2, f) + np.dot(self.h2, g) * u_rl - (1 - gamma_b) * np.dot(self.h2, x) - kd * np.abs(np.dot(self.h2, std)),
-                    gamma_b * self.F + np.dot(self.h3, f) + np.dot(self.h3, g) * u_rl - (1 - gamma_b) * np.dot(self.h3, x) - kd * np.abs(np.dot(self.h3, std)),
-                    gamma_b * self.F + np.dot(self.h4, f) + np.dot(self.h4, g) * u_rl - (1 - gamma_b) * np.dot(self.h4, x) - kd * np.abs(np.dot(self.h4, std)),
-                    -u_rl + self.torque_bound,
-                    u_rl + self.torque_bound,
-                    -f[1] - g[1] * u_rl + self.max_speed,
-                    f[1] + g[1] * u_rl + self.max_speed
-            ]
+                gamma_b * self.F
+                + np.dot(self.h1, f)
+                + np.dot(self.h1, g) * u_rl
+                - (1 - gamma_b) * np.dot(self.h1, x)
+                - kd * np.abs(np.dot(self.h1, std)),
+                gamma_b * self.F
+                + np.dot(self.h2, f)
+                + np.dot(self.h2, g) * u_rl
+                - (1 - gamma_b) * np.dot(self.h2, x)
+                - kd * np.abs(np.dot(self.h2, std)),
+                gamma_b * self.F
+                + np.dot(self.h3, f)
+                + np.dot(self.h3, g) * u_rl
+                - (1 - gamma_b) * np.dot(self.h3, x)
+                - kd * np.abs(np.dot(self.h3, std)),
+                gamma_b * self.F
+                + np.dot(self.h4, f)
+                + np.dot(self.h4, g) * u_rl
+                - (1 - gamma_b) * np.dot(self.h4, x)
+                - kd * np.abs(np.dot(self.h4, std)),
+                -u_rl + self.torque_bound,
+                u_rl + self.torque_bound,
+                -f[1] - g[1] * u_rl + self.max_speed,
+                f[1] + g[1] * u_rl + self.max_speed,
+            ],
         )
         h = np.squeeze(h).astype(np.double)
-        
+
         # Convert numpy arrays to cvx matrices to set up QP
         G = matrix(G, tc='d')
         h = matrix(h, tc='d')
@@ -150,10 +189,10 @@ def control_barrier(self, original_action: torch.Tensor, f: np.ndarray, g: np.nd
         # Check if the adjusted action is within bounds
         if np.add(np.squeeze(u_rl), np.squeeze(u_bar[0])) - 0.001 >= self.torque_bound:
             u_bar[0] = self.torque_bound - u_rl
-            print("Error in QP")
+            print('Error in QP')
         elif np.add(np.squeeze(u_rl), np.squeeze(u_bar[0])) + 0.001 <= -self.torque_bound:
             u_bar[0] = -self.torque_bound - u_rl
-            print("Error in QP")
+            print('Error in QP')
 
         return torch.as_tensor(u_bar[0], dtype=torch.float32, device=self._device).unsqueeze(dim=0)
 
@@ -173,18 +212,27 @@ def get_dynamics(self, obs: list[float], original_action: float) -> np.ndarray:
         dt = 0.05  # Time step
         G = 10  # Gravitational constant
         m = 2  # Mass
-        l = 2  # Length
+        length = 2  # Length
 
         theta = np.arctan2(obs[1], obs[0])  # Calculate the angle
         theta_dot = obs[2]  # Angular velocity
 
         # Dynamics equations
-        f = np.array([-3 * G / (2 * l) * np.sin(theta + np.pi) * dt**2 + theta_dot * dt + theta + 3 / (m * l**2) * original_action * dt**2,
-                    theta_dot - 3 * G / (2 * l) * np.sin(theta + np.pi) * dt + 3 / (m * l**2) * original_action * dt])
+        f = np.array(
+            [
+                -3 * G / (2 * length) * np.sin(theta + np.pi) * dt**2
+                + theta_dot * dt
+                + theta
+                + 3 / (m * length**2) * original_action * dt**2,
+                theta_dot
+                - 3 * G / (2 * length) * np.sin(theta + np.pi) * dt
+                + 3 / (m * length**2) * original_action * dt,
+            ],
+        )
 
         return np.squeeze(f)
 
-    def update_GP_dynamics(self, obs: np.ndarray, act: np.ndarray) -> None:
+    def update_gp_dynamics(self, obs: np.ndarray, act: np.ndarray) -> None:
         """
         Updates the Gaussian Process (GP) dynamics model based on observed states and actions.
 
@@ -192,60 +240,70 @@ def update_GP_dynamics(self, obs: np.ndarray, act: np.ndarray) -> None:
             obs (np.ndarray): Observed states.
             act (np.ndarray): Actions taken.
         """
-        obs=obs.detach().cpu().squeeze().numpy()
-        act=act.detach().cpu().squeeze().numpy()
+        obs = obs.detach().cpu().squeeze().numpy()
+        act = act.detach().cpu().squeeze().numpy()
         N = self.observation_size
         X = obs
         U = act
         L = len(X)
-        err = np.zeros((L-1, N-1))
-        S = np.zeros((L-1, 2))
-        for i in range(L-1):
+        err = np.zeros((L - 1, N - 1))
+        S = np.zeros((L - 1, 2))
+        for i in range(L - 1):
             f = self.get_dynamics(X[i], U[i])
             theta_p = np.arctan2(X[i][1], X[i][0])
             theta_dot_p = X[i][2]
-            theta = np.arctan2(X[i+1][1], X[i+1][0])
-            theta_dot = X[i+1][2]
+            theta = np.arctan2(X[i + 1][1], X[i + 1][0])
+            theta_dot = X[i + 1][2]
             S[i, :] = np.array([theta_p, theta_dot_p])
             err[i, :] = np.array([theta, theta_dot]) - f
-        self.GP_model[0].fit(S, err[:, 0])
-        self.GP_model[1].fit(S, err[:, 1])
+        self.gp_model[0].fit(S, err[:, 0])
+        self.gp_model[1].fit(S, err[:, 1])
 
-    def get_GP_dynamics(self, obs: torch.Tensor, use_prev_model: bool) -> list[np.ndarray]:
+    def get_gp_dynamics(self, obs: torch.Tensor, use_prev_model: bool) -> list[np.ndarray]:
         """
-        Retrieves the GP dynamics based on the current observation.
+        Retrieves the gp dynamics based on the current observation.
 
         Args:
             obs (torch.Tensor): Current state observation.
 
         Returns:
-            list[np.ndarray]: list containing the GP dynamics [f, g, x, std].
+            list[np.ndarray]: list containing the gp dynamics [f, g, x, std].
         """
         obs = obs.cpu().detach().numpy()
         u_rl = 0
         dt = 0.05
         G = 10
         m = 1
-        l = 1
+        length = 1
         obs = np.squeeze(obs)
         theta = np.arctan2(obs[1], obs[0])
         theta_dot = obs[2]
-        x = np.array([theta, theta_dot]) # 这个x估计就对应state
+        x = np.array([theta, theta_dot])  # 这个x估计就对应state
         f_nom = np.array(
             [
-                -3*G/(2*l)*np.sin(theta + np.pi)*dt**2 + theta_dot*dt + theta + 3/(m*l**2)*u_rl*dt**2, 
-                theta_dot - 3*G/(2*l)*np.sin(theta + np.pi)*dt + 3/(m*l**2)*u_rl*dt
-            ]
+                -3 * G / (2 * length) * np.sin(theta + np.pi) * dt**2
+                + theta_dot * dt
+                + theta
+                + 3 / (m * length**2) * u_rl * dt**2,
+                theta_dot
+                - 3 * G / (2 * length) * np.sin(theta + np.pi) * dt
+                + 3 / (m * length**2) * u_rl * dt,
+            ],
         )
-        g = np.array([3/(m*l**2)*dt**2, 3/(m*l**2)*dt])
+        g = np.array([3 / (m * length**2) * dt**2, 3 / (m * length**2) * dt])
         f_nom = np.squeeze(f_nom)
         f = np.zeros(2)
         if use_prev_model:
-            [m1, std1] = self.GP_model_prev[0].predict(x.reshape(1,-1), return_std=True)
-            [m2, std2] = self.GP_model_prev[1].predict(x.reshape(1,-1), return_std=True)
+            [m1, std1] = self.gp_model_prev[0].predict(x.reshape(1, -1), return_std=True)
+            [m2, std2] = self.gp_model_prev[1].predict(x.reshape(1, -1), return_std=True)
         else:
-            [m1, std1] = self.GP_model[0].predict(x.reshape(1, -1), return_std=True)
-            [m2, std2] = self.GP_model[1].predict(x.reshape(1, -1), return_std=True)
+            [m1, std1] = self.gp_model[0].predict(x.reshape(1, -1), return_std=True)
+            [m2, std2] = self.gp_model[1].predict(x.reshape(1, -1), return_std=True)
         f[0] = f_nom[0] + m1
         f[1] = f_nom[1] + m2
-        return [np.squeeze(f), np.squeeze(g), np.squeeze(x), np.array([np.squeeze(std1), np.squeeze(std2)])]
+        return [
+            np.squeeze(f),
+            np.squeeze(g),
+            np.squeeze(x),
+            np.array([np.squeeze(std1), np.squeeze(std2)]),
+        ]
diff --git a/omnisafe/common/buffer/vector_onpolicy_buffer.py b/omnisafe/common/buffer/vector_onpolicy_buffer.py
index a8e2c25a8..3ebd61c87 100644
--- a/omnisafe/common/buffer/vector_onpolicy_buffer.py
+++ b/omnisafe/common/buffer/vector_onpolicy_buffer.py
@@ -87,7 +87,7 @@ def __init__(  # pylint: disable=super-init-not-called,too-many-arguments
             )
             for _ in range(num_envs)
         ]
-        
+
     def add_field(self, name: str, shape: tuple[int, ...], dtype: torch.dtype) -> None:
         """Add a field to the buffer.
 
diff --git a/omnisafe/common/robust_barrier_solver.py b/omnisafe/common/robust_barrier_solver.py
index 80d8d33b6..639ae8d3a 100644
--- a/omnisafe/common/robust_barrier_solver.py
+++ b/omnisafe/common/robust_barrier_solver.py
@@ -1,145 +1,152 @@
+from __future__ import annotations
+
+from typing import Any
+
+import gymnasium as gym
 import numpy as np
 import torch
-from cvxopt import matrix
-from cvxopt import solvers
-from omnisafe.common.utils import to_tensor, prRed, sort_vertices_cclockwise
 from qpth.qp import QPFunction
 
-DYNAMICS_MODE = {'Unicycle': {'n_s': 3, 'n_u': 2},   # state = [x y θ]
-                 'SimulatedCars': {'n_s': 10, 'n_u': 1},  # state = [x y θ v ω]
-                 'Pvtol': {'n_s': 6, 'n_u': 2},  # state = [x y θ v_x v_y thrust]
-                 'Pendulum-v1': {'n_s': 3, 'n_u': 1}
-                 }  
+from omnisafe.common.utils import sort_vertices_cclockwise, to_tensor
+
+
+DYNAMICS_MODE = {'Unicycle': {'n_s': 3, 'n_u': 2}}
 
 
 class CBFQPLayer:
 
-    def __init__(self, env, device='cpu', gamma_b=20, k_d=3.0, l_p=0.03):
-        """Constructor of CBFLayer.
-
-        Parameters
-        ----------
-        env : gym.env
-            Gym environment.
-        gamma_b : float, optional
-            gamma of control barrier certificate.
-        k_d : float, optional
-            confidence parameter desired (2.0 corresponds to ~95% for example).
+    def __init__(
+        self,
+        env: gym.Env,
+        device: str = 'cpu',
+        gamma_b: float = 20,
+        k_d: float = 3.0,
+        l_p: float = 0.03,
+    ) -> None:
+        """Initializes a CBFLayer instance with specified parameters and environment.
+
+        Args:
+            env (gym.Env): The Gym environment to interact with.
+            device (str, optional): The device type, such as 'cpu' or 'gpu'. Defaults to 'cpu'.
+            gamma_b (float, optional): The gamma parameter of the control barrier certificate. Defaults to 20.
+            k_d (float, optional): The confidence parameter desired (e.g., 2.0 corresponds to ~95% confidence). Defaults to 3.0.
+            l_p (float, optional): Some additional layer parameter, purpose unspecified. Defaults to 0.03.
         """
-
         self.device = torch.device(device)
-
         self.env = env
         self.u_min, self.u_max = self.get_control_bounds()
         self.gamma_b = gamma_b
-        
         self.k_d = k_d
         self.l_p = l_p
-
         self.action_dim = env.action_space.shape[0]
 
-    def get_safe_action(self, state_batch, action_batch, mean_pred_batch, sigma_batch, modular=False, cbf_info_batch=None): # TODO: 迁移的核心在于此，把它用CBF的方法来改写就好
+    def get_safe_action(
+        self,
+        state_batch: torch.Tensor,
+        action_batch: torch.Tensor,
+        mean_pred_batch: torch.Tensor,
+        sigma_batch: torch.Tensor,
+    ) -> torch.Tensor:
+        """Computes safe actions based on current state and action predictions, adjusting for uncertainties.
+
+        Args:
+            state_batch (torch.Tensor): Current state batch, tensor or ndarray.
+            action_batch (torch.Tensor): Nominal action batch, tensor or ndarray.
+            mean_pred_batch (torch.Tensor): Mean disturbance predictions, tensor or ndarray.
+            sigma_batch (torch.Tensor): Standard deviations of disturbances, tensor or ndarray.
+            cbf_info_batch (torch.Tensor, optional): Additional control barrier function information batch, tensor or ndarray.
+
+        Returns:
+            torch.Tensor: Safe actions adjusted for given constraints and uncertainties.
         """
-
-        Parameters
-        ----------
-        state_batch : torch.tensor or ndarray
-        action_batch : torch.tensor or ndarray
-            State batch
-        mean_pred_batch : torch.tensor or ndarray
-            Mean of disturbance
-        sigma_batch : torch.tensor or ndarray
-            Standard deviation of disturbance
-
-        Returns
-        -------
-        final_action_batch : torch.tensor
-            Safe actions to take in the environment.
-        """
-
-        # batch form if only a single data point is passed
+        # Batch form adjustment if only a single data point is passed
         expand_dims = len(state_batch.shape) == 1
         if expand_dims:
-            action_batch = action_batch.unsqueeze(0)
             state_batch = state_batch.unsqueeze(0)
+            action_batch = action_batch.unsqueeze(0)
             mean_pred_batch = mean_pred_batch.unsqueeze(0)
             sigma_batch = sigma_batch.unsqueeze(0)
-            if cbf_info_batch is not None:
-                cbf_info_batch = cbf_info_batch.unsqueeze(0)
-
-        if modular:
-            final_action = torch.clamp(action_batch, self.u_min.repeat(action_batch.shape[0], 1), self.u_max.repeat(action_batch.shape[0], 1))
-        else:
-            Ps, qs, Gs, hs = self.get_cbf_qp_constraints(state_batch, action_batch, mean_pred_batch, sigma_batch, modular=modular, cbf_info_batch=cbf_info_batch)
-            
-            Ps, qs, Gs, hs = Ps.detach().cpu().numpy(), qs.detach().cpu().numpy(), Gs.detach().cpu().numpy(), hs.detach().cpu().numpy()
-            batch_size = Ps.shape[0]
-            safe_actions = []
-            for i in range(batch_size):
-                Ps_m = matrix(np.diag([1., 1e16]), tc='d')
-                qs_m = matrix(np.zeros(2))
-                Gs_m = matrix(np.float64(Gs[i]), tc='d')
-                hs_m = matrix(np.float64(hs[i]), tc='d')
-                solvers.options['show_progress'] = False
-                sol = solvers.qp(Ps_m, qs_m, Gs_m, hs_m)
-                safe_action=torch.as_tensor(sol['x'][0], dtype=torch.float32)
-                safe_actions.append(safe_action)
-            safe_action_batch = torch.as_tensor(safe_actions, dtype=torch.float32, device=self.device).unsqueeze(-1)
-            
-            # print(action_batch.shape, safe_action_batch.shape)
-            # safe_action_batch = self.solve_qp(Ps, qs, Gs, hs)
-            final_action = torch.clamp(action_batch + safe_action_batch, self.u_min.repeat(action_batch.shape[0], 1), self.u_max.repeat(action_batch.shape[0], 1))
-
-        return final_action if not expand_dims else final_action.squeeze(0)
-
-    def solve_qp(self, Ps: torch.Tensor, qs: torch.Tensor, Gs: torch.Tensor, hs: torch.Tensor):
-        """Solves:
+
+        Ps, qs, Gs, hs = self.get_cbf_qp_constraints(
+            state_batch,
+            action_batch,
+            mean_pred_batch,
+            sigma_batch,
+        )
+        safe_action_batch = self.solve_qp(Ps, qs, Gs, hs)
+        final_action_batch = torch.clamp(
+            action_batch + safe_action_batch,
+            self.u_min.repeat(action_batch.shape[0], 1),
+            self.u_max.repeat(action_batch.shape[0], 1),
+        )
+
+        return final_action_batch if not expand_dims else final_action_batch.squeeze(0)
+
+    def solve_qp(
+        self,
+        Ps: torch.Tensor,
+        qs: torch.Tensor,
+        Gs: torch.Tensor,
+        hs: torch.Tensor,
+    ) -> torch.Tensor:
+        """Solves a batch of quadratic programming (QP) problems.
+
+        Each QP problem is defined as:
             minimize_{u,eps} 0.5 * u^T P u + q^T u
-                subject to G[u,eps]^T <= h
-
-        Parameters
-        ----------
-        Ps : torch.Tensor
-            (batch_size, n_u+1, n_u+1)
-        qs : torch.Tensor
-            (batch_size, n_u+1)
-        Gs : torch.Tensor
-            (batch_size, num_ineq_constraints, n_u+1)
-        hs : torch.Tensor
-            (batch_size, num_ineq_constraints)
-        Returns
-        -------
-        safe_action_batch : torch.tensor
-            The solution of the qp without the last dimension (the slack).
+            subject to G[u,eps]^T <= h
+
+        Args:
+            Ps (torch.Tensor): Quadratic cost matrix for each problem, with shape (batch_size, n_u+1, n_u+1).
+            qs (torch.Tensor): Linear cost vector for each problem, with shape (batch_size, n_u+1).
+            Gs (torch.Tensor): Inequality constraint matrix for each problem, with shape (batch_size, num_ineq_constraints, n_u+1).
+            hs (torch.Tensor): Inequality constraint vector for each problem, with shape (batch_size, num_ineq_constraints).
+
+        Returns:
+            The safe action for each problem, omitting the slack variable, with dimension (batch_size, n_u).
         """
 
         Ghs = torch.cat((Gs, hs.unsqueeze(2)), -1)
         Ghs_norm = torch.max(torch.abs(Ghs), dim=2, keepdim=True)[0]
         Gs /= Ghs_norm
         hs = hs / Ghs_norm.squeeze(-1)
-        sol = self.cbf_layer(Ps, qs, Gs, hs, solver_args={"check_Q_spd": False, "maxIter": 100000, "notImprovedLim": 10, "eps": 1e-4})
-        safe_action_batch = sol[:, :self.env.action_space.shape[0]]
-        return safe_action_batch
-
-    def cbf_layer(self, Qs, ps, Gs, hs, As=None, bs=None, solver_args=None):
-        """
-
-        Parameters
-        ----------
-        Qs : torch.Tensor
-        ps : torch.Tensor
-        Gs : torch.Tensor
-            shape (batch_size, num_ineq_constraints, num_vars)
-        hs : torch.Tensor
-            shape (batch_size, num_ineq_constraints)
-        As : torch.Tensor, optional
-        bs : torch.Tensor, optional
-        solver_args : dict, optional
-
-        Returns
-        -------
-        result : torch.Tensor
-            Result of QP
+        sol = self.cbf_layer(
+            Ps,
+            qs,
+            Gs,
+            hs,
+            solver_args={
+                'check_Q_spd': False,
+                'maxIter': 100000,
+                'notImprovedLim': 10,
+                'eps': 1e-4,
+            },
+        )
+
+        return sol[:, : self.env.action_space.shape[0]]
+
+    def cbf_layer(
+        self,
+        Qs: torch.Tensor,
+        ps: torch.Tensor,
+        Gs: torch.Tensor,
+        hs: torch.Tensor,
+        As: torch.Tensor | None = None,
+        bs: torch.Tensor | None = None,
+        solver_args: dict[str, Any] | None = None,
+    ) -> torch.Tensor:
+        """Applies a custom layer to solve QP problems using given constraints.
+
+        Args:
+            Qs (torch.Tensor): Quadratic cost matrix for each problem.
+            ps (torch.Tensor): Linear cost vector for each problem.
+            Gs (torch.Tensor): Inequality constraint matrix for each problem, shape (batch_size, num_ineq_constraints, num_vars).
+            hs (torch.Tensor): Inequality constraint vector for each problem, shape (batch_size, num_ineq_constraints).
+            As (torch.Tensor, optional): Equality constraint matrix. Defaults to None.
+            bs (torch.Tensor, optional): Equality constraint vector. Defaults to None.
+            solver_args (dict, optional): Dictionary of solver arguments. Defaults to None.
+
+        Returns:
+            Result of the QP solver for each problem.
         """
 
         if solver_args is None:
@@ -149,57 +156,54 @@ def cbf_layer(self, Qs, ps, Gs, hs, As=None, bs=None, solver_args=None):
             As = torch.Tensor().to(self.device).double()
             bs = torch.Tensor().to(self.device).double()
 
-        result = QPFunction(verbose=-1, **solver_args)(Qs.double(), ps.double(), Gs.double(), hs.double(), As, bs).float()
-        if torch.any(torch.isnan(result)):
-            prRed('QP Failed to solve - result is nan == {}!'.format(torch.any(torch.isnan(result))))
-            raise Exception('QP Failed to solve')
-        return result
-
-    def get_cbf_qp_constraints(self, state_batch, action_batch, mean_pred_batch, sigma_pred_batch, modular=False, cbf_info_batch=None): # TODO: 解耦合的核心在这里
-        """Build up matrices required to solve qp
-        
-        Program specifically solves:
+        return QPFunction(verbose=-1, **solver_args)(
+            Qs.double(),
+            ps.double(),
+            Gs.double(),
+            hs.double(),
+            As,
+            bs,
+        ).float()
+
+    def get_cbf_qp_constraints(
+        self,
+        state_batch: torch.Tensor,
+        action_batch: torch.Tensor,
+        mean_pred_batch: torch.Tensor,
+        sigma_pred_batch: torch.Tensor,
+        gamma_b: float = 1.0,
+    ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+        """Builds up matrices required to solve a quadratic program (QP).
+
+        The QP is defined to solve:
             minimize_{u,eps} 0.5 * u^T P u + q^T u
-                subject to G[u,eps]^T <= h
-
-        Each control barrier certificate is of the form:
-            dh/dx^T (f_out + g_out u) >= -gamma^b h_out^3 where out here is an output of the state.
-
-        In the case of SafetyGym_point dynamics:
-        state = [x y θ v ω]
-        state_d = [v*cos(θ) v*sin(θ) omega ω u^v u^ω]
-
-        Quick Note on batch matrix multiplication for matrices A and B:
-            - Batch size should be first dim
-            - Everything needs to be 3-dimensional
-            - E.g. if B is a vec, i.e. shape (batch_size, vec_length) --> .view(batch_size, vec_length, 1)
-
-        Parameters
-        ----------
-        state_batch : torch.tensor
-            current state (check dynamics.py for details on each dynamics' specifics)
-        action_batch : torch.tensor
-            Nominal control input.
-        mean_pred_batch : torch.tensor
-            mean disturbance prediction state, dimensions (n_s, n_u)
-        sigma_pred_batch : torch.tensor
-            standard deviation in additive disturbance after undergoing the output dynamics.
-        gamma_b : float, optional
-            CBF parameter for the class-Kappa function
-
-        Returns
-        -------
-        P : torch.tensor
-            Quadratic cost matrix in qp (minimize_{u,eps} 0.5 * u^T P u + q^T u)
-        q : torch.tensor
-            Linear cost vector in qp (minimize_{u,eps} 0.5 * u^T P u + q^T u)
-        G : torch.tensor
-            Inequality constraint matrix (G[u,eps] <= h) of size (num_constraints, n_u + 1)
-        h : torch.tensor
-            Inequality constraint vector (G[u,eps] <= h) of size (num_constraints,)
+            subject to G[u,eps]^T <= h
+
+        Args:
+            state_batch (torch.Tensor): Current state batch. Refer to `dynamics.py` for specifics on each dynamic.
+            action_batch (torch.Tensor): Nominal control input batch.
+            mean_pred_batch (torch.Tensor): Mean disturbance prediction state batch, dimensions (n_s, n_u).
+            sigma_pred_batch (torch.Tensor): Standard deviation of the additive disturbance after undergoing the output dynamics.
+            gamma_b (float, optional): CBF parameter for the class-Kappa function. Defaults to 1.0.
+
+        Returns:
+            tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: A tuple containing:
+                P (torch.Tensor): Quadratic cost matrix in the QP.
+                q (torch.Tensor): Linear cost vector in the QP.
+                G (torch.Tensor): Inequality constraint matrix for QP constraints.
+                h (torch.Tensor): Inequality constraint vector for QP constraints.
         """
-
-        assert len(state_batch.shape) == 2 and len(action_batch.shape) == 2 and len(mean_pred_batch.shape) == 2 and len(sigma_pred_batch.shape) == 2, print(state_batch.shape, action_batch.shape, mean_pred_batch.shape, sigma_pred_batch.shape)
+        assert (
+            len(state_batch.shape) == 2
+            and len(action_batch.shape) == 2
+            and len(mean_pred_batch.shape) == 2
+            and len(sigma_pred_batch.shape) == 2
+        ), print(
+            state_batch.shape,
+            action_batch.shape,
+            mean_pred_batch.shape,
+            sigma_pred_batch.shape,
+        )
 
         batch_size = state_batch.shape[0]
         gamma_b = self.gamma_b
@@ -209,76 +213,7 @@ def get_cbf_qp_constraints(self, state_batch, action_batch, mean_pred_batch, sig
         action_batch = torch.unsqueeze(action_batch, -1).to(self.device)
         mean_pred_batch = torch.unsqueeze(mean_pred_batch, -1).to(self.device)
         sigma_pred_batch = torch.unsqueeze(sigma_pred_batch, -1).to(self.device)
-
-        if self.env.dynamics_mode == 'Pendulum':
-            num_constraints = 8
-            n_u = action_batch.shape[1]  # dimension of control inputs
-            # Inequality constraints (G[u, eps] <= h)
-            G = torch.zeros((batch_size, num_constraints, n_u + 1)).to(self.device)  # the extra variable is for epsilon (to make sure qp is always feasible)
-            h = torch.zeros((batch_size, num_constraints)).to(self.device)
-            
-            h1 = torch.FloatTensor([1, 0.01]).unsqueeze(-1).to(self.device)
-            h2 = torch.FloatTensor([1, -0.01]).unsqueeze(-1).to(self.device)
-            h3 = torch.FloatTensor([-1, 0.01]).unsqueeze(-1).to(self.device)
-            h4 = torch.FloatTensor([-1, -0.01]).unsqueeze(-1).to(self.device)
-            action_batch_scaled=(action_batch*15.0).squeeze(-1).to(self.device) # TODO: 写的好看点
-            
-            theta = state_batch[:,0,:].squeeze(-1)
-            theta_dot = state_batch[:,1,:].squeeze(-1)
-            f_norm = torch.zeros(batch_size, 2).to(self.device)
-            # theta [batch_size, 1]
-            f_norm[:, 0] = -3*10/2*torch.sin(theta+torch.pi)*self.env.dt + theta
-            f_norm[: ,1] = theta_dot - 3*10/2*torch.sin(theta+torch.pi)
-            
-            g = torch.tensor([3*self.env.dt**2, 3*self.env.dt]).unsqueeze(0).to(self.device)
-            
-            f = torch.zeros_like(f_norm).to(self.device)
-            f[:, 0] = f_norm[:, 0] + mean_pred_batch[:,0,:].squeeze(-1)
-            f[:, 1] = f_norm[:, 1] + mean_pred_batch[:,1,:].squeeze(-1)
-            G = torch.tensor(
-                [
-                    [
-                        -torch.matmul(g, h1), 
-                        -torch.matmul(g, h2), 
-                        -torch.matmul(g, h3), 
-                        -torch.matmul(g, h4), 
-                        1,
-                        -1,
-                        g[:, 1],
-                        -g[:, 1]
-                    ],
-                    [
-                        -1, 
-                        -1, 
-                        -1, 
-                        -1, 
-                        0, 
-                        0, 
-                        0, 
-                        0
-                    ]
-                ]
-            ).transpose(0, 1).repeat(batch_size, 1, 1).to(self.device)
-            state_batch_squeeze = state_batch.squeeze(-1)
-            sigma_pred_batch_squeeze = sigma_pred_batch.squeeze(-1)
-
-            h = torch.cat(
-                [
-                    self.gamma_b + torch.matmul(f, h1) + torch.matmul(g, h1) * action_batch_scaled - (1 - self.gamma_b) * torch.matmul(state_batch_squeeze, h1) - self.k_d * torch.abs(torch.matmul(sigma_pred_batch_squeeze, h1)),
-                    self.gamma_b + torch.matmul(f, h2) + torch.matmul(g, h2) * action_batch_scaled - (1 - self.gamma_b) * torch.matmul(state_batch_squeeze, h2) - self.k_d * torch.abs(torch.matmul(sigma_pred_batch_squeeze, h2)),
-                    self.gamma_b + torch.matmul(f, h3) + torch.matmul(g, h3) * action_batch_scaled - (1 - self.gamma_b) * torch.matmul(state_batch_squeeze, h3) - self.k_d * torch.abs(torch.matmul(sigma_pred_batch_squeeze, h3)),
-                    self.gamma_b + torch.matmul(f, h4) + torch.matmul(g, h4) * action_batch_scaled - (1 - self.gamma_b) * torch.matmul(state_batch_squeeze, h4) - self.k_d * torch.abs(torch.matmul(sigma_pred_batch_squeeze, h4)),
-                    -action_batch_scaled + 15.0,
-                    action_batch_scaled + 15.0,
-                    -f[:, 1].unsqueeze(-1) - g[:, 1] * action_batch_scaled + 60.0,
-                    f[:, 1].unsqueeze(-1) + g[:, 1] * action_batch_scaled + 60.0
-                ],
-                dim=1
-            ).to(self.device)
-            P = torch.diag(torch.tensor([1.e0, 1e16])).repeat(batch_size, 1, 1).to(self.device)
-            q = torch.zeros((batch_size, self.action_dim + 1)).to(self.device)
-        
-        elif self.env.dynamics_mode == 'Unicycle':
+        if self.env.dynamics_mode == 'Unicycle':
 
             num_cbfs = len(self.env.hazards)
             l_p = self.l_p
@@ -287,18 +222,10 @@ def get_cbf_qp_constraints(self, state_batch, action_batch, mean_pred_batch, sig
             thetas = state_batch[:, 2, :].squeeze(-1)
             c_thetas = torch.cos(thetas)
             s_thetas = torch.sin(thetas)
-
-            # p(x): lookahead output (batch_size, 2)
             ps = torch.zeros((batch_size, 2)).to(self.device)
             ps[:, 0] = state_batch[:, 0, :].squeeze(-1) + l_p * c_thetas
             ps[:, 1] = state_batch[:, 1, :].squeeze(-1) + l_p * s_thetas
-
-            # p_dot(x) = f_p(x) + g_p(x)u + D_p where f_p(x) = 0,  g_p(x) = RL and D_p is the disturbance
-
-            # f_p(x) = [0,...,0]^T
             f_ps = torch.zeros((batch_size, 2, 1)).to(self.device)
-
-            # g_p(x) = RL where L = diag([1, l_p])
             Rs = torch.zeros((batch_size, 2, 2)).to(self.device)
             Rs[:, 0, 0] = c_thetas
             Rs[:, 0, 1] = -s_thetas
@@ -307,9 +234,7 @@ def get_cbf_qp_constraints(self, state_batch, action_batch, mean_pred_batch, sig
             Ls = torch.zeros((batch_size, 2, 2)).to(self.device)
             Ls[:, 0, 0] = 1
             Ls[:, 1, 1] = l_p
-            g_ps = torch.bmm(Rs, Ls)  # (batch_size, 2, 2)
-
-            # D_p(x) = g_p [0 D_θ]^T + [D_x1 D_x2]^T
+            g_ps = torch.bmm(Rs, Ls)
             mu_theta_aug = torch.zeros([batch_size, 2, 1]).to(self.device)
             mu_theta_aug[:, 1, :] = mean_pred_batch[:, 2, :]
             mu_ps = torch.bmm(g_ps, mu_theta_aug) + mean_pred_batch[:, :2, :]
@@ -318,42 +243,45 @@ def get_cbf_qp_constraints(self, state_batch, action_batch, mean_pred_batch, sig
             sigma_ps = torch.bmm(torch.abs(g_ps), sigma_theta_aug) + sigma_pred_batch[:, :2, :]
 
             # Build RCBFs
-            hs = 1e3 * torch.ones((batch_size, num_cbfs), device=self.device)  # the RCBF itself
+            hs = 1e3 * torch.ones((batch_size, num_cbfs), device=self.device)
             dhdps = torch.zeros((batch_size, num_cbfs, 2), device=self.device)
             hazards = self.env.hazards
             for i in range(len(hazards)):
-                if hazards[i]['type'] == 'circle':  # 1/2 * (||ps - x_obs||^2 - r^2)
+                if hazards[i]['type'] == 'circle':
                     obs_loc = to_tensor(hazards[i]['location'], torch.FloatTensor, self.device)
-                    hs[:, i] = 0.5 * (torch.sum((ps - obs_loc)**2, dim=1) - (hazards[i]['radius'] + buffer)**2)
-                    dhdps[:, i, :] = (ps - obs_loc)
-                elif hazards[i]['type'] == 'polygon':  # max_j(h_j) where h_j = 1/2 * (dist2seg_j)^2
-                    vertices = sort_vertices_cclockwise(hazards[i]['vertices'])  # (n_v, 2)
-                    segments = np.diff(vertices, axis=0,
-                                       append=vertices[[0]])  # (n_v, 2) at row i contains vector from v_i to v_i+1
+                    hs[:, i] = 0.5 * (
+                        torch.sum((ps - obs_loc) ** 2, dim=1) - (hazards[i]['radius'] + buffer) ** 2
+                    )
+                    dhdps[:, i, :] = ps - obs_loc
+                elif hazards[i]['type'] == 'polygon':
+                    vertices = sort_vertices_cclockwise(hazards[i]['vertices'])
+                    segments = np.diff(vertices, axis=0, append=vertices[[0]])
                     segments = to_tensor(segments, torch.FloatTensor, self.device)
                     vertices = to_tensor(vertices, torch.FloatTensor, self.device)
-                    # Get max RBCF TODO: Can be optimized
                     for j in range(segments.shape[0]):
-                        # Compute Distances to segment
-                        dot_products = torch.matmul(ps - vertices[j:j + 1], segments[j]) / torch.sum(
-                            segments[j] ** 2)  # (batch_size,)
-                        mask0_ = dot_products < 0  # if <0 closest point on segment is vertex j
-                        mask1_ = dot_products > 1  # if >0 closest point on segment is vertex j+1
-                        mask_ = torch.logical_and(dot_products >= 0,
-                                                  dot_products <= 1)  # Else find distance to line l_{v_j, v_j+1}
-                        # Compute Distances
-                        dists2seg = torch.zeros((batch_size))
+                        dot_products = torch.matmul(
+                            ps - vertices[j : j + 1],
+                            segments[j],
+                        ) / torch.sum(segments[j] ** 2)
+                        mask0_ = dot_products < 0
+                        mask1_ = dot_products > 1
+                        mask_ = torch.logical_and(dot_products >= 0, dot_products <= 1)
+                        dists2seg = torch.zeros(batch_size)
                         if mask0_.sum() > 0:
                             dists2seg[mask0_] = torch.linalg.norm(ps[mask0_] - vertices[[j]], dim=1)
                         if mask1_.sum() > 0:
-                            dists2seg[mask1_] = torch.linalg.norm(ps[mask1_] - vertices[[(j + 1) % segments.shape[0]]], dim=1)
+                            dists2seg[mask1_] = torch.linalg.norm(
+                                ps[mask1_] - vertices[[(j + 1) % segments.shape[0]]],
+                                dim=1,
+                            )
                         if mask_.sum() > 0:
                             dists2seg[mask_] = torch.linalg.norm(
-                                dot_products[mask_, None] * segments[j].tile((torch.sum(mask_), 1)) + vertices[[j]] -
-                            ps[mask_], dim=1)
-                        # Compute hs_ for this segment
-                        hs_ = 0.5 * ((dists2seg ** 2) + 0.5*buffer)  # (batch_size,)
-                        # Compute dhdps TODO: Can be optimized to only compute for indices that need updating
+                                dot_products[mask_, None] * segments[j].tile((torch.sum(mask_), 1))
+                                + vertices[[j]]
+                                - ps[mask_],
+                                dim=1,
+                            )
+                        hs_ = 0.5 * ((dists2seg**2) + 0.5 * buffer)
                         dhdps_ = torch.zeros((batch_size, 2))
                         if mask0_.sum() > 0:
                             dhdps_[mask0_] = ps[mask0_] - vertices[[j]]
@@ -362,8 +290,9 @@ def get_cbf_qp_constraints(self, state_batch, action_batch, mean_pred_batch, sig
                         if mask_.sum() > 0:
                             normal_vec = torch.tensor([segments[j][1], -segments[j][0]])
                             normal_vec /= torch.linalg.norm(normal_vec)
-                            dhdps_[mask_] = (ps[mask_]-vertices[j]).matmul(normal_vec) * normal_vec.view((1,2)).repeat(torch.sum(mask_), 1)  # dot products (batch_size, 1)
-                        # Find indices to update (closest segment basically, worst case -> CBF boolean and is a min)
+                            dhdps_[mask_] = (ps[mask_] - vertices[j]).matmul(
+                                normal_vec,
+                            ) * normal_vec.view((1, 2)).repeat(torch.sum(mask_), 1)
                         idxs_to_update = torch.nonzero(hs[:, i] - hs_ > 0)
                         # Update the actual hs to be used in the constraints
                         if idxs_to_update.shape[0] > 0:
@@ -371,38 +300,43 @@ def get_cbf_qp_constraints(self, state_batch, action_batch, mean_pred_batch, sig
                             # Compute dhdhps for those indices
                             dhdps[idxs_to_update, i, :] = dhdps_[idxs_to_update, :]
                 else:
-                    raise Exception('Only obstacles of type `circle` or `polygon` are supported, got: {}'.format(hazards[i]['type']))
+                    raise Exception(
+                        'Only obstacles of type `circle` or `polygon` are supported, got: {}'.format(
+                            hazards[i]['type'],
+                        ),
+                    )
 
-            n_u = action_batch.shape[1]  # dimension of control inputs
-            num_constraints = num_cbfs + 2 * n_u  # each cbf is a constraint, and we need to add actuator constraints (n_u of them)
+            n_u = action_batch.shape[1]
+            num_constraints = num_cbfs + 2 * n_u
 
-            # Inequality constraints (G[u, eps] <= h)
-            G = torch.zeros((batch_size, num_constraints, n_u + 1)).to(self.device)  # the extra variable is for epsilon (to make sure qp is always feasible)
+            G = torch.zeros((batch_size, num_constraints, n_u + 1)).to(self.device)
             h = torch.zeros((batch_size, num_constraints)).to(self.device)
             ineq_constraint_counter = 0
 
-            # Add inequality constraints
-            G[:, :num_cbfs, :n_u] = -torch.bmm(dhdps, g_ps)  # h1^Tg(x)
-            G[:, :num_cbfs, n_u] = -1  # for slack
-            h[:, :num_cbfs] = gamma_b * (hs ** 3) + (torch.bmm(dhdps, f_ps + mu_ps) - torch.bmm(torch.abs(dhdps), sigma_ps) + torch.bmm(torch.bmm(dhdps, g_ps), action_batch)).squeeze(-1)
+            G[:, :num_cbfs, :n_u] = -torch.bmm(dhdps, g_ps)
+            G[:, :num_cbfs, n_u] = -1
+            h[:, :num_cbfs] = gamma_b * (hs**3) + (
+                torch.bmm(dhdps, f_ps + mu_ps)
+                - torch.bmm(torch.abs(dhdps), sigma_ps)
+                + torch.bmm(torch.bmm(dhdps, g_ps), action_batch)
+            ).squeeze(-1)
             ineq_constraint_counter += num_cbfs
-
-            # Let's also build the cost matrices, vectors to minimize control effort and penalize slack
-            P = torch.diag(torch.tensor([1.e0, 1.e-2, 1e5])).repeat(batch_size, 1, 1).to(self.device)
+            P = (
+                torch.diag(torch.tensor([1.0e0, 1.0e-2, 1e5]))
+                .repeat(batch_size, 1, 1)
+                .to(self.device)
+            )
             q = torch.zeros((batch_size, n_u + 1)).to(self.device)
 
-        # Add Actuator Constraints
-        n_u = action_batch.shape[1]  # dimension of control inputs
+        n_u = action_batch.shape[1]
 
         for c in range(n_u):
 
-            # u_max >= u_nom + u ---> u <= u_max - u_nom
             if self.u_max is not None:
                 G[:, ineq_constraint_counter, c] = 1
                 h[:, ineq_constraint_counter] = self.u_max[c] - action_batch[:, c].squeeze(-1)
                 ineq_constraint_counter += 1
 
-            # u_min <= u_nom + u ---> -u <= u_min - u_nom
             if self.u_min is not None:
                 G[:, ineq_constraint_counter, c] = -1
                 h[:, ineq_constraint_counter] = -self.u_min[c] + action_batch[:, c].squeeze(-1)
@@ -410,19 +344,14 @@ def get_cbf_qp_constraints(self, state_batch, action_batch, mean_pred_batch, sig
 
         return P, q, G, h
 
-    def get_control_bounds(self):
+    def get_control_bounds(self) -> tuple[torch.Tensor, torch.Tensor]:
         """
 
-        Returns
-        -------
-        u_min : torch.tensor
-            min control input.
-        u_max : torch.tensor
-            max control input.
+        Returns:
+            Action bounds, i.e., min control input and max control input.
         """
 
         u_min = torch.tensor(self.env.safe_action_space.low).to(self.device)
         u_max = torch.tensor(self.env.safe_action_space.high).to(self.device)
 
         return u_min, u_max
-    
\ No newline at end of file
diff --git a/omnisafe/common/robust_gp_model.py b/omnisafe/common/robust_gp_model.py
index 2824faf12..3380d1f2d 100644
--- a/omnisafe/common/robust_gp_model.py
+++ b/omnisafe/common/robust_gp_model.py
@@ -1,59 +1,110 @@
-""" Adapted almost directly from:
-https://docs.gpytorch.ai/en/stable/examples/02_Scalable_Exact_GPs/Simple_GP_Regression_CUDA.html
+from __future__ import annotations
 
-Training is performed rapidly (and exactly) using GPUs and prediction is done very rapidly using LOVE.
-"""
+import os
+import warnings
+from typing import Callable
 
-import torch
-import numpy as np
 import gpytorch
-import warnings
-warnings.filterwarnings('ignore')
-from omnisafe.common.utils import to_tensor, to_numpy
+import gymnasium as gym
+import numpy as np
+import torch
+from gpytorch.distributions import MultivariateNormal
+from gpytorch.kernels import RBFKernel, ScaleKernel
+from gpytorch.likelihoods import Likelihood
+from gpytorch.means import ZeroMean
+from gpytorch.priors import NormalPrior
+
+from omnisafe.common.utils import to_numpy, to_tensor
+from omnisafe.typing import DEVICE_CPU
+
 
-DYNAMICS_MODE = {'Unicycle': {'n_s': 3, 'n_u': 2},   # state = [x y θ]
-                 'SimulatedCars': {'n_s': 10, 'n_u': 1},  # state = [x y θ v ω]
-                 'Pvtol': {'n_s': 6, 'n_u': 2},  # state = [x y θ v_x v_y thrust]
-                 'Pendulum': {'n_s': 2, 'n_u': 1}
-                 }
-MAX_STD = {'Unicycle': [2e-1, 2e-1, 2e-1], 'SimulatedCars': [0, 0.2, 0, 0.2, 0, 0.2, 0, 0.2, 0, 0.2],  'Pvtol': [0, 0, 0, 0, 0, 0],  'Pendulum': [0.1, 0.1, 0.1]}
+DYNAMICS_MODE = {'Unicycle': {'n_s': 3, 'n_u': 2}}
+MAX_STD = {'Unicycle': [2e-1, 2e-1, 2e-1]}
 
 
 class BaseGPy(gpytorch.models.ExactGP):
+    """
+    A Gaussian Process (GP) model using a zero mean function and a scaled RBF kernel with priors.
+
+    This class extends gpytorch.models.ExactGP, specifically designed for use in
+    disturbance estimation tasks.
 
-    def __init__(self, train_x, train_y, prior_std, likelihood):
+    Attributes:
+        mean_module (ZeroMean): The mean module which is set to zero mean.
+        covar_module (ScaleKernel): The covariance kernel, a scaled RBF kernel with specified priors.
+
+    Args:
+        train_x (Tensor): Training input features, which should be a tensor.
+        train_y (Tensor): Training target values, which should be a tensor.
+        prior_std (float): The prior standard deviation used to adjust the output scale of the kernel.
+        likelihood (Likelihood): The likelihood function associated with the GP model.
+    """
+
+    def __init__(
+        self,
+        train_x: torch.Tensor,
+        train_y: torch.Tensor,
+        prior_std: float,
+        likelihood: Likelihood,
+    ) -> None:
+        """Initialize the BaseGPy model."""
         super().__init__(train_x, train_y, likelihood)
-        self.mean_module = gpytorch.means.ZeroMean()
-        self.covar_module = gpytorch.kernels.ScaleKernel(
-                            gpytorch.kernels.RBFKernel(lengthscale_prior=gpytorch.priors.NormalPrior(1e5, 1e-5)),
-                            outputscale_prior=gpytorch.priors.NormalPrior(prior_std + 1e-6, 1e-5))
-        # Initialize lengthscale and outputscale to mean of priors
+        self.mean_module = ZeroMean()
+        self.covar_module = ScaleKernel(
+            RBFKernel(lengthscale_prior=NormalPrior(1e5, 1e-5)),
+            outputscale_prior=NormalPrior(prior_std + 1e-6, 1e-5),
+        )
         self.covar_module.base_kernel.lengthscale = 1e5
         self.covar_module.outputscale = prior_std + 1e-6
 
-    def forward(self, x):
+    def forward(self, x: torch.Tensor) -> MultivariateNormal:
+        """Forward pass through the GP model to produce a multivariate normal distribution.
+
+        Args:
+            x (Tensor): Input features for which predictions are to be made.
+
+        Returns:
+            MultivariateNormal: A multivariate normal distribution reflecting the GP predictions.
+        """
         mean = self.mean_module(x)
         covar = self.covar_module(x)
-        return gpytorch.distributions.MultivariateNormal(mean, covar)
+        return MultivariateNormal(mean, covar)
+
 
 class GPyDisturbanceEstimator:
-    """
-    A wrapper around teh BaseGPy model above.
+    """A class for estimating disturbances using Gaussian Processes with GPyTorch.
+
+    Attributes:
+        device (torch.device): The device (CPU or CUDA) on which the tensors will be processed.
+        _train_x (torch.Tensor): Training data features.
+        _train_y (torch.Tensor): Training data targets.
+        likelihood (gpytorch.likelihoods.Likelihood): The likelihood model for GP inference.
+        model (BaseGPy): The GPyTorch model.
+
+    Args:
+        train_x (torch.Tensor): Training data features. If not a tensor, it will be converted.
+        train_y (torch.Tensor): Training data targets. If not a tensor, it will be converted.
+        prior_std (float): Standard deviation of the prior distribution.
+        likelihood (Optional[gpytorch.likelihoods.Likelihood]): A GPyTorch likelihood. If None, a default GaussianLikelihood is used.
+        device (Optional[torch.device]): The torch device. Defaults to CPU if None.
     """
 
-    def __init__(self, train_x, train_y, prior_std, likelihood=None, device=None):
-
-        if device:
-            self.device = device
-        else:
-            self.device = torch.device("cpu")
+    def __init__(
+        self,
+        train_x: torch.Tensor,
+        train_y: torch.Tensor,
+        prior_std: float,
+        likelihood: gpytorch.likelihoods.Likelihood | None = None,
+        device: torch.device = DEVICE_CPU,
+    ) -> None:
+        self.device = device if device else torch.device('cpu')
 
         if not torch.is_tensor(train_x):
-            train_x = to_tensor(train_x, torch.FloatTensor, self.device)
+            train_x = torch.tensor(train_x, dtype=torch.float32, device=self.device)
         if not torch.is_tensor(train_y):
-            train_y = to_tensor(train_y, torch.FloatTensor, self.device)
-        self.train_x = train_x
-        self.train_y = train_y
+            train_y = torch.tensor(train_y, dtype=torch.float32, device=self.device)
+        self._train_x = train_x
+        self._train_y = train_y
 
         if not likelihood:
             likelihood = gpytorch.likelihoods.GaussianLikelihood()
@@ -61,182 +112,143 @@ def __init__(self, train_x, train_y, prior_std, likelihood=None, device=None):
 
         self.model = BaseGPy(train_x, train_y, prior_std, likelihood)
         self.model = self.model.to(self.device)
+        warnings.filterwarnings('ignore')
 
-    def train(self, training_iter, verbose=False):
+    def train(self, training_iter: int, verbose: bool = False) -> None:
+        """Trains the Gaussian Process model.
 
-        # Find optimal model hyperparameters
+        Args:
+            training_iter (int): Number of training iterations.
+            verbose (bool): If True, prints detailed logging information.
+        """
         self.model.train()
         self.likelihood.train()
-
-        # Use the adam optimizer
-        optimizer = torch.optim.Adam(self.model.parameters(), lr=0.1)  # Includes GaussianLikelihood parameters
-
-        # "Loss" for GPs - the marginal log likelihood
+        optimizer = torch.optim.Adam(self.model.parameters(), lr=0.1)
         mll = gpytorch.mlls.ExactMarginalLogLikelihood(self.likelihood, self.model)
 
         for i in range(training_iter):
-            # Zero gradients from previous iteration
             optimizer.zero_grad()
-            # Output from model
-            output = self.model(self.train_x)
-            # Calc loss and backprop gradients
-            loss = -mll(output, self.train_y)
+            output = self.model(self._train_x)
+            loss = -mll(output, self._train_y)
             loss.backward()
             if verbose:
-                print('\tIter %d/%d - Loss: %.3f   lengthscale: %.3f   noise: %.3f' % (
-                    i + 1, training_iter, loss.item(),
-                    self.model.covar_module.base_kernel.lengthscale.item(),
-                    self.model.likelihood.noise.item()
-                ))
+                print(
+                    f'\tIter {i + 1}/{training_iter} - Loss: {loss.item():.3f}   lengthscale: '
+                    f'{self.model.covar_module.base_kernel.lengthscale.item():.3f}   noise: '
+                    f'{self.likelihood.noise.item():.3f}',
+                )
             optimizer.step()
 
-    def predict(self, test_x):
+    def predict(self, test_x: torch.Tensor) -> dict[str, torch.Tensor | np.ndarray]:
+        """
+        Makes predictions on new data.
+
+        Args:
+            test_x (torch.Tensor): Test data features. If not a tensor, it will be converted.
 
-        # Convert to torch tensor
+        Returns:
+            A dictionary containing prediction mean, variance, covariance matrix, and confidence
+            intervals. If the input was not a tensor, values will be converted to numpy arrays.
+        """
         is_tensor = torch.is_tensor(test_x)
         if not is_tensor:
-           test_x = to_tensor(test_x, torch.FloatTensor, self.device)
+            test_x = torch.tensor(test_x, dtype=torch.float32, device=self.device)
 
-        # Get into evaluation (predictive posterior) mode
         self.model.eval()
         self.likelihood.eval()
 
-        # Test points are regularly spaced along [0,1]
-        # Make predictions by feeding model through likelihood
         with torch.no_grad(), gpytorch.settings.fast_pred_var():
             observed_pred = self.likelihood(self.model(test_x))
-            pred_dict = dict()
-            pred_dict['mean'] = observed_pred.mean.cpu()
-            pred_dict['f_var'] = observed_pred.variance.cpu()
-            pred_dict['f_covar'] = observed_pred.covariance_matrix.cpu()
-            lower_ci, upper_ci = observed_pred.confidence_region()
-            pred_dict['lower_ci'] = lower_ci.cpu()
-            pred_dict['upper_ci'] = upper_ci.cpu()
-
-        # If they gave us ndarray, we give back ndarray
+            pred_dict = {
+                'mean': observed_pred.mean.cpu(),
+                'f_var': observed_pred.variance.cpu(),
+                'f_covar': observed_pred.covariance_matrix.cpu(),
+                'lower_ci': observed_pred.confidence_region()[0].cpu(),
+                'upper_ci': observed_pred.confidence_region()[1].cpu(),
+            }
+
         if not is_tensor:
             for key, val in pred_dict.items():
-                pred_dict[key] = to_numpy(val)
+                pred_dict[key] = val.numpy()
 
         return pred_dict
 
-class DynamicsModel:
 
-    def __init__(self, env, gp_model_size=2000, l_p=0.03, device='cpu'):
-        """Constructor of DynamicsModel.
+class DynamicsModel:
+    """Initializes the DynamicsModel with a gym environment.
 
-        Parameters
-        ----------
-        env : gym.env
-            Gym environment.
-        """
+    Args:
+        env (gym.Env): The gym environment to model dynamics for.
+        gp_model_size (int, optional): Maximum history count for disturbances. Defaults to 2000.
+        l_p (float, optional): Learning parameter. Defaults to 0.03.
+        device (str, optional): The device to perform computations on. Defaults to 'cpu'.
+    """
 
+    def __init__(
+        self,
+        env: gym.Env,
+        gp_model_size: int = 2000,
+        l_p: float = 0.03,
+        device: str = 'cpu',
+    ) -> None:
         self.env = env
-        # Get Dynamics
         self.get_f, self.get_g = self.get_dynamics()
         self.n_s = DYNAMICS_MODE[self.env.dynamics_mode]['n_s']
         self.n_u = DYNAMICS_MODE[self.env.dynamics_mode]['n_u']
 
-        # Keep Disturbance History to estimate it using GPs
-        self.disturb_estimators = None
-        self.disturbance_history = dict()
-        self.history_counter = 0  # keeping only max_history_count points in the buffer
-        self.max_history_count = gp_model_size  # How many points we want to have in the GP
+        self._disturb_estimators = None
+        self.disturbance_history = {}
+        self.history_counter = 0
+        self.max_history_count = gp_model_size
         self.disturbance_history['state'] = np.zeros((self.max_history_count, self.n_s))
         self.disturbance_history['disturbance'] = np.zeros((self.max_history_count, self.n_s))
-        self.train_x = None  # x-data used to fit the last GP models
-        self.train_y = None  # y-data used to fit the last GP models
+        self._train_x = None
+        self._train_y = None
 
         self.l_p = l_p
-
         self.device = torch.device(device)
 
-    def predict_next_state(self, state_batch, u_batch, t_batch=None, use_gps=True):
-        """Given the current state and action, this function predicts the next state.
-
-        Parameters
-        ----------
-        state_batch : ndarray
-            State
-        u_batch : ndarray
-            Action
-        t_batch: ndarray, optional
-            Time batch for state dependant dynamics
-        use_gps : bool, optional
-            Use GPs to return mean and var
-
-        Returns
-        -------
-        next_state : ndarray
-            Next state
+    def predict_next_state(self, state_batch: np.ndarray, u_batch: np.ndarray) -> np.ndarray:
         """
+        Predicts the next state given the current state and action batch.
 
+        Args:
+            state_batch (np.ndarray): The batch of current states.
+            u_batch (np.ndarray): The batch of actions applied.
+
+        Returns:
+            np.ndarray: The batch of predicted next states.
+        """
         expand_dims = len(state_batch.shape) == 1
         if expand_dims:
             state_batch = np.expand_dims(state_batch, axis=0)
 
-        # Start with our prior for continuous time system x' = f(x) + g(x)u
-        if t_batch is not None:
-            next_state_batch = state_batch + self.env.dt * (self.get_f(state_batch, t_batch) + (self.get_g(state_batch, t_batch) @ np.expand_dims(u_batch, -1)).squeeze(-1))
-        else:
-            next_state_batch = state_batch + self.env.dt * (self.get_f(state_batch) + (self.get_g(state_batch) @ np.expand_dims(u_batch, -1)).squeeze(-1))
-
-        if use_gps:  # if we want estimate the disturbance, let's do it!
-            pred_mean, pred_std = self.predict_disturbance(state_batch)
-            next_state_batch += self.env.dt * pred_mean
-        else:
-            pred_std = np.zeros(state_batch.shape)
+        next_state_batch = state_batch + self.env.dt * (
+            self.get_f(state_batch)
+            + (self.get_g(state_batch) @ np.expand_dims(u_batch, -1)).squeeze(-1)
+        )
+        pred_mean, pred_std = self.predict_disturbance(state_batch)
+        next_state_batch += self.env.dt * pred_mean
 
         if expand_dims:
             next_state_batch = next_state_batch.squeeze(0)
             if pred_std is not None:
                 pred_std = pred_std.squeeze(0)
 
-        if t_batch is not None:
-            next_t_batch = t_batch + self.env.dt
-            return next_state_batch, self.env.dt * pred_std, next_t_batch
-
-        return next_state_batch, self.env.dt * pred_std, t_batch
-
-    def predict_next_obs(self, state, u):
-        """Predicts the next observation given the state and u. Note that this only predicts the mean next observation.
+        return next_state_batch
 
-        Parameters
-        ----------
-        state : ndarray
-        u : ndarray
+    def get_dynamics(self) -> tuple[Callable, Callable]:
+        """Retrieves the dynamics functions for drift and control based on the environment's dynamics mode.
 
-        Returns
-        -------
-        next_obs : ndarray
-            Next observation
+        Returns:
+            tuple: A tuple containing two callables, `get_f` and `get_g`, which compute the drift and control dynamics respectively.
         """
-
-        next_state, _, _ = self.predict_next_state(state, u)
-        next_obs = self.get_obs(next_state)
-        return next_obs
-
-    def get_dynamics(self):
-        """Get affine CBFs for a given environment.
-
-        Parameters
-        ----------
-
-        Returns
-        -------
-        get_f : callable
-                Drift dynamics of the continuous system x' = f(x) + g(x)u
-        get_g : callable
-                Control dynamics of the continuous system x' = f(x) + g(x)u
-        """
-
         if self.env.dynamics_mode == 'Unicycle':
 
-            def get_f(state_batch, t_batch=None):
-                f_x = np.zeros(state_batch.shape)
-                return f_x
+            def get_f(state_batch: np.ndarray) -> np.ndarray:
+                return np.zeros(state_batch.shape)
 
-            def get_g(state_batch, t_batch=None):
+            def get_g(state_batch: np.ndarray) -> np.ndarray:
                 theta = state_batch[:, 2]
                 g_x = np.zeros((state_batch.shape[0], 3, 2))
                 g_x[:, 0, 0] = np.cos(theta)
@@ -244,53 +256,28 @@ def get_g(state_batch, t_batch=None):
                 g_x[:, 2, 1] = 1.0
                 return g_x
 
-        elif self.env.dynamics_mode == 'Pendulum':
-            
-            def get_f(state_batch, t_batch=None):
-                f_x = np.zeros(state_batch.shape)
-                theta = state_batch[:, 0]
-                theta_dot = state_batch[:, 1]
-                f_x = np.array(
-                    [
-                        -3*10/2*np.sin(theta+np.pi)*self.env.dt + theta,
-                        theta_dot - 3*10/2*np.sin(theta+np.pi)
-                    ]
-                )
-                return f_x
-
-            def get_g(state_batch, t_batch=None):
-                g_x = np.zeros((state_batch.shape[0], 2, 1))
-                g_x[:, 0, 0] = 3*self.env.dt**2
-                g_x[:, 1, 0] = 3*self.env.dt
-                return g_x
-
         else:
             raise Exception('Unknown Dynamics mode.')
 
         return get_f, get_g
 
-    def get_state(self, obs):
-        """Given the observation, this function does the pre-processing necessary and returns the state.
-
-        Parameters
-        ----------
-        obs_batch : ndarray or torch.tensor
-            Environment observation.
+    def get_state(self, obs: np.ndarray) -> np.ndarray:
+        """
+        Processes the raw observations from the environment and returns the corresponding state representation.
 
-        Returns
-        -------
-        state_batch : ndarray or torch.tensor
-            State of the system.
+        Args:
+            obs (np.ndarray): The environment observations.
 
+        Returns:
+            np.ndarray: The processed state of the system.
         """
-
         expand_dims = len(obs.shape) == 1
         is_tensor = torch.is_tensor(obs)
 
         if is_tensor:
             dtype = obs.dtype
             device = obs.device
-            obs = to_numpy(obs)
+            obs = obs.cpu().numpy() if obs.is_cuda else obs.numpy()
 
         if expand_dims:
             obs = np.expand_dims(obs, 0)
@@ -301,64 +288,29 @@ def get_state(self, obs):
             state_batch[:, 0] = obs[:, 0]
             state_batch[:, 1] = obs[:, 1]
             state_batch[:, 2] = theta
-        elif self.env.dynamics_mode == 'Pendulum':
-            theta = np.arctan2(obs[:, 1], obs[:, 0])
-            theta_dot = obs[:, 2]
-            state_batch = np.zeros((obs.shape[0], 2))
-            state_batch[:, 0] = theta
-            state_batch[:, 1] = theta_dot
         else:
             raise Exception('Unknown dynamics')
 
         if expand_dims:
             state_batch = state_batch.squeeze(0)
 
-        return to_tensor(state_batch, dtype, device) if is_tensor else state_batch
-
-    def get_obs(self, state_batch):
-        """Given the state, this function returns it to an observation akin to the one obtained by calling env.step
-
-        Parameters
-        ----------
-        state : ndarray
-            Environment state batch of shape (batch_size, n_s)
-
-        Returns
-        -------
-        obs : ndarray
-          Observation batch of shape (batch_size, n_o)
-
-        """
-
-        if self.env.dynamics_mode == 'Unicycle':
-            obs = np.zeros((state_batch.shape[0], 4))
-            obs[:, 0] = state_batch[:, 0]
-            obs[:, 1] = state_batch[:, 1]
-            obs[:, 2] = np.cos(state_batch[:, 2])
-            obs[:, 3] = np.sin(state_batch[:, 2])
-        else:
-            raise Exception('Unknown dynamics')
-        return obs
-
-    def append_transition(self, state_batch, u_batch, next_state_batch, t_batch=None):
-        """Estimates the disturbance from the current dynamics transition and adds it to buffer.
-
-        Parameters
-        ----------
-        state_batch : ndarray
-            shape (n_s,) or (batch_size, n_s)
-        u_batch : ndarray
-            shape (n_u,) or (batch_size, n_u)
-        next_state_batch : ndarray
-            shape (n_s,) or (batch_size, n_s)
-        t_batch : ndarray, optional
-            shape (1,) or (batch_size, 1)
-
-        Returns
-        -------
-
+        if is_tensor:
+            return torch.tensor(state_batch, dtype=dtype, device=device)
+        return state_batch
+
+    def append_transition(
+        self,
+        state_batch: np.ndarray,
+        u_batch: np.ndarray,
+        next_state_batch: np.ndarray,
+    ) -> None:
+        """Estimates the disturbance from the current dynamics transition and adds it to the buffer.
+
+        Args:
+            state_batch (np.ndarray): The batch of current states, shape (n_s,) or (batch_size, n_s).
+            u_batch (np.ndarray): The batch of actions applied, shape (n_u,) or (batch_size, n_u).
+            next_state_batch (np.ndarray): The batch of next states, shape (n_s,) or (batch_size, n_s).
         """
-
         expand_dims = len(state_batch.shape) == 1
 
         if expand_dims:
@@ -366,71 +318,68 @@ def append_transition(self, state_batch, u_batch, next_state_batch, t_batch=None
             next_state_batch = np.expand_dims(next_state_batch, 0)
             u_batch = np.expand_dims(u_batch, 0)
 
-        u_batch = np.expand_dims(u_batch, -1)  # for broadcasting batch matrix multiplication 
-        disturbance_batch = (next_state_batch - state_batch - self.env.dt * (self.get_f(state_batch, t_batch) + (self.get_g(state_batch, t_batch) @ u_batch).squeeze(-1))) / self.env.dt
+        u_batch = np.expand_dims(u_batch, -1)
+        disturbance_batch = (
+            next_state_batch
+            - state_batch
+            - self.env.dt
+            * (self.get_f(state_batch) + (self.get_g(state_batch) @ u_batch).squeeze(-1))
+        ) / self.env.dt
 
-        # Append new data point (state, disturbance) to our dataset
         for i in range(state_batch.shape[0]):
-
-            self.disturbance_history['state'][self.history_counter % self.max_history_count] = state_batch[i]
-            self.disturbance_history['disturbance'][self.history_counter % self.max_history_count] = disturbance_batch[i]
-
-            # Increment how many data points we have
+            self.disturbance_history['state'][self.history_counter % self.max_history_count] = (
+                state_batch[i]
+            )
+            self.disturbance_history['disturbance'][
+                self.history_counter % self.max_history_count
+            ] = disturbance_batch[i]
             self.history_counter += 1
 
-            # Update GP models every max_history_count data points
-            if self.history_counter % (self.max_history_count/10) == 0:
+            if self.history_counter % (self.max_history_count // 10) == 0:
                 self.fit_gp_model()
 
-    def fit_gp_model(self, training_iter=70):
-        """
-
-        Parameters
-        ----------
-        training_iter : int
-            Number of training iterations for GP model.
-
-        Returns
-        -------
+    def fit_gp_model(self, training_iter: int = 70) -> None:
+        """Fits a Gaussian Process model to the disturbance data.
 
+        Args:
+            training_iter (int, optional): Number of training iterations for the GP model. Defaults to 70.
         """
-
-        if self.history_counter < self.max_history_count:  # didn't fill the buffer yet
-            train_x = self.disturbance_history['state'][:self.history_counter]
-            train_y = self.disturbance_history['disturbance'][:self.history_counter]
-        else:  # buffer filled, use all the data points
+        if self.history_counter < self.max_history_count:
+            train_x = self.disturbance_history['state'][: self.history_counter]
+            train_y = self.disturbance_history['disturbance'][: self.history_counter]
+        else:
             train_x = self.disturbance_history['state']
             train_y = self.disturbance_history['disturbance']
 
-        # Normalize Data
         train_x_std = np.std(train_x, axis=0)
         train_x_normalized = train_x / (train_x_std + 1e-8)
         train_y_std = np.std(train_y, axis=0)
         train_y_normalized = train_y / (train_y_std + 1e-8)
 
-        self.disturb_estimators = []
+        self._disturb_estimators = []
         for i in range(self.n_s):
-            # self.disturb_estimators.append(GPyDisturbanceEstimator(train_x, train_y[:, i]))
-            self.disturb_estimators.append(GPyDisturbanceEstimator(train_x_normalized, train_y_normalized[:, i], MAX_STD[self.env.dynamics_mode][i], device=self.device))
-            self.disturb_estimators[i].train(training_iter)
-
-        # track the data I last used to fit the GPs for saving purposes (need it to initialize before loading weights)
-        self.train_x = train_x
-        self.train_y = train_y
-
-    def predict_disturbance(self, test_x):
-        """Predict the disturbance at the queried states using the GP models.
-
-        Parameters
-        ----------
-        test_x : ndarray or torch.tensor
-                shape(n_test, n_s)
-        Returns
-        -------
-        means: ndarray or torch.tensor
-            Prediction means -- shape(n_test, n_s)
-        vars: ndarray or torch.tensor
-            Prediction variances -- shape(n_test, n_s)
+            self._disturb_estimators.append(
+                GPyDisturbanceEstimator(
+                    train_x_normalized,
+                    train_y_normalized[:, i],
+                    MAX_STD[self.env.dynamics_mode][i],
+                    device=self.device,
+                ),
+            )
+            self._disturb_estimators[i].train(training_iter)
+
+        self._train_x = train_x
+        self._train_y = train_y
+
+    def predict_disturbance(self, test_x: np.ndarray) -> tuple:
+        """Predicts the disturbance at the queried states using the trained Gaussian Process models.
+
+        Args:
+            test_x (np.ndarray): The state for which to predict disturbances, shape (n_test, n_s).
+
+        Returns:
+            tuple: A tuple of arrays (means, variances) where means is the predicted mean disturbance
+                and variances is the corresponding variance, shape (n_test, n_s).
         """
 
         is_tensor = torch.is_tensor(test_x)
@@ -445,19 +394,18 @@ def predict_disturbance(self, test_x):
             test_x = np.expand_dims(test_x, axis=0)
 
         means = np.zeros(test_x.shape)
-        f_std = np.zeros(test_x.shape)  # standard deviation
+        f_std = np.zeros(test_x.shape)
 
-        if self.disturb_estimators:
-            # Normalize
-            train_x_std = np.std(self.train_x, axis=0)
-            train_y_std = np.std(self.train_y, axis=0)
+        if self._disturb_estimators:
+            train_x_std = np.std(self._train_x, axis=0)
+            train_y_std = np.std(self._train_y, axis=0)
             test_x = test_x / train_x_std
             for i in range(self.n_s):
-                prediction_ = self.disturb_estimators[i].predict(test_x)
+                prediction_ = self._disturb_estimators[i].predict(test_x)
                 means[:, i] = prediction_['mean'] * (train_y_std[i] + 1e-8)
                 f_std[:, i] = np.sqrt(prediction_['f_var']) * (train_y_std[i] + 1e-8)
 
-        else:  # zero-mean, max_sigma prior
+        else:
             f_std = np.ones(test_x.shape)
             for i in range(self.n_s):
                 f_std[:, i] *= MAX_STD[self.env.dynamics_mode][i]
@@ -466,33 +414,48 @@ def predict_disturbance(self, test_x):
             means = means.squeeze(0)
             f_std = f_std.squeeze(0)
 
-        return (to_tensor(means, dtype, device), to_tensor(f_std, dtype, device)) if is_tensor else (means, f_std)
-
-    def load_disturbance_models(self, output):
+        return (
+            (to_tensor(means, dtype, device), to_tensor(f_std, dtype, device))
+            if is_tensor
+            else (means, f_std)
+        )
 
-        if output is None:
-            return
+    def load_disturbance_models(self, save_dir: str, epoch: str) -> None:
+        """Loads the disturbance models and their training data.
 
-        self.disturb_estimators = []
-
-        weights = torch.load('{}/gp_models.pkl'.format(output), map_location=self.device)
-        self.train_x = torch.load('{}/gp_models_train_x.pkl'.format(output))
-        self.train_y = torch.load('{}/gp_models_train_y.pkl'.format(output))
+        Args:
+            save_dir (str): The directory where the model files are saved.
+            epoch (str): The epoch identifier used in the filenames to load the specific model checkpoint.
+        """
+        self._disturb_estimators = []
+        weights = torch.load(
+            os.path.join(save_dir, f'gp_models_{epoch}.pkl'),
+            map_location=self.device,
+        )
+        self._train_x = torch.load(os.path.join(save_dir, f'gp_models_train_x_{epoch}.pkl'))
+        self._train_y = torch.load(os.path.join(save_dir, f'gp_models_train_y_{epoch}.pkl'))
         for i in range(self.n_s):
-            self.disturb_estimators.append(GPyDisturbanceEstimator(self.train_x, self.train_y[:, i], MAX_STD[self.env.dynamics_mode][i], device=self.device))
-            self.disturb_estimators[i].model.load_state_dict(weights[i])
-
-    def save_disturbance_models(self, output):
-
-        if not self.disturb_estimators or self.train_x is None or self.train_y is None:
-            return
-        weights = []
-        for i in range(len(self.disturb_estimators)):
-            weights.append(self.disturb_estimators[i].model.state_dict())
-        torch.save(weights, '{}/gp_models.pkl'.format(output))
-        # Also save data used to fit model (needed for initializing the model before loading weights)
-        torch.save(self.train_x, '{}/gp_models_train_x.pkl'.format(output))
-        torch.save(self.train_y, '{}/gp_models_train_y.pkl'.format(output))
-
-    def seed(self, seed):
-        torch.manual_seed(seed)
\ No newline at end of file
+            self._disturb_estimators.append(
+                GPyDisturbanceEstimator(
+                    self._train_x,
+                    self._train_y[:, i],
+                    MAX_STD[self.env.dynamics_mode][i],
+                    device=self.device,
+                ),
+            )
+            self._disturb_estimators[i].model.load_state_dict(weights[i])
+
+    @property
+    def train_x(self) -> np.ndarray:
+        """Returns the training data input features used for the disturbance estimators."""
+        return self._train_x
+
+    @property
+    def train_y(self) -> np.ndarray:
+        """Returns the training data labels used for the disturbance estimators."""
+        return self._train_y
+
+    @property
+    def disturb_estimators(self) -> list[GPyDisturbanceEstimator]:
+        """Provides access to the list of trained disturbance estimator models."""
+        return self._disturb_estimators
diff --git a/omnisafe/common/utils.py b/omnisafe/common/utils.py
index beee622e5..ec36fe157 100644
--- a/omnisafe/common/utils.py
+++ b/omnisafe/common/utils.py
@@ -1,182 +1,51 @@
-import math
 import numpy as np
-import os
 import torch
-from torch.autograd import Variable
 
-USE_CUDA = torch.cuda.is_available()
 
+def to_numpy(x: torch.Tensor) -> np.ndarray:
+    """Convert a torch tensor to a numpy array.
 
-def prRed(prt): print("\033[91m {}\033[00m".format(prt))
+    Args:
+    x (torch.Tensor): A torch tensor to be converted.
 
-
-def prGreen(prt): print("\033[92m {}\033[00m".format(prt))
-
-
-def prYellow(prt): print("\033[93m {}\033[00m".format(prt))
-
-
-def prLightPurple(prt): print("\033[94m {}\033[00m".format(prt))
-
-
-def prPurple(prt): print("\033[95m {}\033[00m".format(prt))
-
-
-def prCyan(prt): print("\033[96m {}\033[00m".format(prt))
-
-
-def prLightGray(prt): print("\033[97m {}\033[00m".format(prt))
-
-
-def prBlack(prt): print("\033[98m {}\033[00m".format(prt))
-
-
-def mat_to_euler_2d(rot_mat):
+    Returns:
+    np.ndarray: A numpy array representation of the input tensor.
     """
-    rot_mat has shape:
-                [[c -s  0],
-                 [s  c  0],
-                 [0  0  1]]
-    """
-
-    theta = np.arcsin(rot_mat[1, 0])
-    return theta
+    return x.cpu().detach().double().numpy()
 
 
-def euler_to_mat_2d(theta_batch):
-    s = np.sin(theta_batch)
-    c = np.cos(theta_batch)
-    Rs = np.zeros((theta_batch.shape[0], 2, 2))
-    Rs[:, 0, 0] = c
-    Rs[:, 0, 1] = -s
-    Rs[:, 1, 0] = s
-    Rs[:, 1, 1] = c
-    return Rs
+def to_tensor(
+    x: np.ndarray,
+    dtype: torch.dtype,
+    device: torch.device,
+    requires_grad: bool = False,
+) -> torch.Tensor:
+    """Convert a numpy array to a torch tensor of specified type and device.
 
-def to_numpy(x):
-    # convert torch tensor to numpy array
-    return x.cpu().detach().double().numpy()
+    Args:
+    x (np.ndarray): A numpy array to be converted.
+    dtype (torch.dtype): The desired data type for the tensor.
+    device (torch.device): The device to store the tensor on.
+    requires_grad (bool): If True, gradients will be computed for operations involving this tensor.
 
-def to_tensor(x, dtype, device, requires_grad=False):
-    # convert numpy array to torch tensor
+    Returns:
+    torch.Tensor: A torch tensor representation of the input array.
+    """
     if type(x).__module__ != 'numpy':
         return x
     return torch.from_numpy(x).type(dtype).to(device).requires_grad_(requires_grad)
 
-def scale_action(action, action_lb, action_ub, device=None):
-
-    act_k = (action_ub - action_lb) / 2.
-    act_b = (action_ub + action_lb) / 2.
-    return act_k * action + act_b
-
-
-def soft_update(target, source, tau):
-    for target_param, param in zip(target.parameters(), source.parameters()):
-        target_param.data.copy_(
-            target_param.data * (1.0 - tau) + param.data * tau
-        )
-
-
-def hard_update(target, source):
-    for target_param, param in zip(target.parameters(), source.parameters()):
-        target_param.data.copy_(param.data)
-
 
-def create_log_gaussian(mean, log_std, t):
-    quadratic = -((0.5 * (t - mean) / (log_std.exp())).pow(2))
-    l = mean.shape
-    log_z = log_std
-    z = l[-1] * math.log(2 * math.pi)
-    log_p = quadratic.sum(dim=-1) - log_z.sum(dim=-1) - 0.5 * z
-    return log_p
+def sort_vertices_cclockwise(vertices: np.ndarray) -> np.ndarray:
+    """Sort vertices of a 2D convex polygon in counter-clockwise direction.
 
+    Args:
+    vertices (np.ndarray): An array of shape (n_v, 2) where n_v is the number of vertices.
 
-def logsumexp(inputs, dim=None, keepdim=False):
-    if dim is None:
-        inputs = inputs.view(-1)
-        dim = 0
-    s, _ = torch.max(inputs, dim=dim, keepdim=True)
-    outputs = s + (inputs - s).exp().sum(dim=dim, keepdim=True).log()
-    if not keepdim:
-        outputs = outputs.squeeze(dim)
-    return outputs
-
-
-def get_output_folder(parent_dir, env_name):
-    """Return save folder.
-
-    Assumes folders in the parent_dir have suffix -run{run
-    number}. Finds the highest run number and sets the output folder
-    to that number + 1. This is just convenient so that if you run the
-    same script multiple times tensorboard can plot all of the results
-    on the same plots with different names.
-
-    Parameters
-    ----------
-    parent_dir: str
-      Path of the directory containing all experiment runs.
-
-    Returns
-    -------
-    parent_dir/run_dir
-      Path to this run's save directory.
-    """
-    os.makedirs(parent_dir, exist_ok=True)
-    experiment_id = 0
-    for folder_name in os.listdir(parent_dir):
-        if not os.path.isdir(os.path.join(parent_dir, folder_name)):
-            continue
-        try:
-            folder_name = int(folder_name.split('-run')[-1])
-            if folder_name > experiment_id:
-                experiment_id = folder_name
-        except:
-            pass
-    experiment_id += 1
-
-    parent_dir = os.path.join(parent_dir, env_name)
-    parent_dir = parent_dir + '-run{}'.format(experiment_id)
-    os.makedirs(parent_dir, exist_ok=True)
-    return parent_dir
-
-
-def get_wrapped_policy(agent, cbf_wrapper, dynamics_model, compensator=None, warmup=False, action_space=None,
-                       policy_eval=False):
-
-    def wrapped_policy(observation):
-
-        if warmup and action_space:
-            action = action_space.sample()  # Sample random action
-        else:
-            action, _ = agent.select_action(observation, evaluate=policy_eval)  # Sample action from policy
-
-        if compensator:
-            action_comp = compensator(observation)
-        else:
-            action_comp = 0
-        state = dynamics_model.get_state(observation)
-        disturb_mean, disturb_std = dynamics_model.predict_disturbance(state)
-        action_safe = cbf_wrapper.get_safe_action(state, action + action_comp, disturb_mean, disturb_std)
-        # print('state = {}, action = {}, action_comp = {}, u_safe = {}'.format(state, action, action_comp, u_safe))
-        return action + action_comp + action_safe
-
-    return wrapped_policy
-
-def sort_vertices_cclockwise(vertices):
-    """ Function used to sort vertices of 2D convex polygon in counter clockwise direction.
-
-    Parameters
-    ----------
-    vertices : numpy.ndarray
-            Array of size (n_v, 2) where n_v is the number of vertices and d is the dimension of the space
-
-    Returns
-    -------
-    sorted_vertices : numpy.ndarray
-            Array of size (n_v, 2) of the vertices sorted in counter-clockwise direction.
+    Returns:
+    np.ndarray: An array of vertices sorted in counter-clockwise direction.
     """
-
-    assert vertices.shape[1] == 2, "Vertices must each have dimension 2, got {}".format(vertices.shape[1])
+    assert vertices.shape[1] == 2, f'Vertices must each have dimension 2, got {vertices.shape[1]}'
 
     # Sort vertices
     polygon_center = vertices.sum(axis=0, keepdims=True) / vertices.shape[0]  # (1, d)
@@ -184,32 +53,3 @@ def sort_vertices_cclockwise(vertices):
     thetas = np.arctan2(rel_vecs[:, 1], rel_vecs[:, 0])
     idxs = np.argsort(thetas)
     return vertices[idxs, :]
-
-def get_polygon_normals(vertices):
-    """
-
-    Parameters
-    ----------
-    vertices : numpy.ndarray
-            Array of size (n_v, 2) where n_v is the number of 2D vertices.
-    Returns
-    -------
-    normals : numpy.ndarray
-            Array of size (n_v, 2) where each row i is the 2D normal vector of the line from vertices_sorted[i] - vertices_sorted[i+1]
-
-    centers : numpy.ndarary
-           Array of size (n_v, 2) where each row i is the 2D center point of the segment from vertices_sorted[i] to vertices_sorted[i+1]
-    """
-
-    sorted_vertices = sort_vertices_cclockwise(vertices)  # (n_v, 2)
-    diffs = np.diff(sorted_vertices, axis=0, append=sorted_vertices[[0]])  # (n_v, 2) at row i contains vector from v_i to v_i+1
-
-    # Compute Normals (rotate each diff by -90 degrees)
-    diffs = np.diff(sorted_vertices, axis=0, append=sorted_vertices[[0]])  # (n_v, 2) at row i contains vector from v_i to v_i+1
-    normals = np.array([diffs[:, 1], -diffs[:, 0]]).transpose()
-    normals = normals / np.linalg.norm(normals)
-    # Compute Centers
-    centers = (diffs + 2*vertices) / 2.0
-    return normals, centers
-
-
diff --git a/omnisafe/configs/off-policy/DDPGCBF.yaml b/omnisafe/configs/off-policy/DDPGCBF.yaml
index 1579aa658..3eec4dced 100644
--- a/omnisafe/configs/off-policy/DDPGCBF.yaml
+++ b/omnisafe/configs/off-policy/DDPGCBF.yaml
@@ -29,7 +29,7 @@ defaults:
     # total number of steps to train
     total_steps: 80_000
     # number of evaluate episodes
-    eval_episodes: 0
+    eval_episodes: 1
   # algorithm configurations
   algo_cfgs:
     # number of steps to update the policy
@@ -77,7 +77,7 @@ defaults:
     # use tensorboard for logging
     use_tensorboard: True
     # save model frequency
-    save_model_freq: 100
+    save_model_freq: 20
     # save logger path
     log_dir: "./runs"
     # save model path
@@ -105,7 +105,7 @@ defaults:
       # Size of hidden layers
       hidden_sizes: [400, 300]
       # Activation function
-      
+
       activation: relu
       # The learning rate of Critic network
       lr: 0.001
diff --git a/omnisafe/configs/off-policy/SACRCBF.yaml b/omnisafe/configs/off-policy/SACRCBF.yaml
index bb133e56c..53c5e5a17 100644
--- a/omnisafe/configs/off-policy/SACRCBF.yaml
+++ b/omnisafe/configs/off-policy/SACRCBF.yaml
@@ -27,13 +27,13 @@ defaults:
     # number of parallel agent, similar to a3c
     parallel: 1
     # total number of steps to train
-    total_steps: 80_000
+    total_steps: 200000
     # number of evaluate episodes
-    eval_episodes: 0
+    eval_episodes: 1
   # algorithm configurations
   algo_cfgs:
     # number of steps to update the policy
-    steps_per_epoch: 200
+    steps_per_epoch: 1000
     # number of steps per sample
     update_cycle: 1
     # number of iterations to update the policy
@@ -93,7 +93,7 @@ defaults:
     # use tensorboard for logging
     use_tensorboard: True
     # save model frequency
-    save_model_freq: 100
+    save_model_freq: 40
     # save logger path
     log_dir: "./runs"
     # save model path
@@ -126,23 +126,9 @@ defaults:
       lr: 0.0003
   # Dynamics model configurations
   dynamics_model_cfgs:
-    # The max number of episodes updateing GP models
+    # The max number of episodes updating GP models
     gp_max_episodes: 100
     # The size of gp model
     gp_model_size: 2000
     # Whether to use the action compensator
     use_compensator: False
-    
-Pendulum-v1:
-  # algorithm configurations
-  algo_cfgs:
-    # Actor perdorm random action before `start_learning_steps` steps
-    start_learning_steps: 0
-  # control barrier function configurations
-  cbf_cfgs:
-    # gamma of control barrier certificate.
-    gamma_b: 0.5
-    # confidence parameter desired
-    k_d: 1.5
-    # environment dynamics coefficient
-    l_p: 0.03
\ No newline at end of file
diff --git a/omnisafe/configs/on-policy/TRPO.yaml b/omnisafe/configs/on-policy/TRPO.yaml
index a8d60878b..ab025a391 100644
--- a/omnisafe/configs/on-policy/TRPO.yaml
+++ b/omnisafe/configs/on-policy/TRPO.yaml
@@ -155,4 +155,4 @@ Pendulum-v1:
     # hidden layer sizes
     hidden_sizes: [64, 64]
     # activation function
-    activation: relu
\ No newline at end of file
+    activation: relu
diff --git a/omnisafe/configs/on-policy/TRPOCBF.yaml b/omnisafe/configs/on-policy/TRPOCBF.yaml
index 74922c9d2..8fecee0d4 100644
--- a/omnisafe/configs/on-policy/TRPOCBF.yaml
+++ b/omnisafe/configs/on-policy/TRPOCBF.yaml
@@ -136,4 +136,4 @@ defaults:
     # learning rate
     lr: 0.01
     # number of iterations to update the compensator
-    update_iters: 1
\ No newline at end of file
+    update_iters: 1
diff --git a/omnisafe/envs/__init__.py b/omnisafe/envs/__init__.py
index ebeb6af4e..c21b1973c 100644
--- a/omnisafe/envs/__init__.py
+++ b/omnisafe/envs/__init__.py
@@ -15,12 +15,14 @@
 """Environment API for OmniSafe."""
 
 from omnisafe.envs import classic_control
+from omnisafe.envs.barrier_function_env import BarrierFunctionEnv
 from omnisafe.envs.core import CMDP, env_register, make, support_envs
 from omnisafe.envs.crabs_env import CRABSEnv
 from omnisafe.envs.custom_env import CustomEnv
 from omnisafe.envs.meta_drive_env import SafetyMetaDriveEnv
 from omnisafe.envs.barrier_function_env import BarrierFunctionEnv
 from omnisafe.envs.mujoco_env import MujocoEnv
+from omnisafe.envs.robust_barrier_function_env import RobustBarrierFunctionEnv
 from omnisafe.envs.safety_gymnasium_env import SafetyGymnasiumEnv
 from omnisafe.envs.safety_gymnasium_modelbased import SafetyGymnasiumModelBased
 from omnisafe.envs.safety_isaac_gym_env import SafetyIsaacGymEnv
diff --git a/omnisafe/envs/barrier_function_env.py b/omnisafe/envs/barrier_function_env.py
index f8d0d964c..d664e749b 100644
--- a/omnisafe/envs/barrier_function_env.py
+++ b/omnisafe/envs/barrier_function_env.py
@@ -21,24 +21,26 @@
 import gymnasium
 import numpy as np
 import torch
-
 from gymnasium import spaces
+
+from omnisafe.common.logger import Logger
 from omnisafe.envs.core import CMDP, env_register
 from omnisafe.typing import Box
 
 
-# @env_register
+@env_register
 class BarrierFunctionEnv(CMDP):
     """Interface of control barrier function-based environments.
-    
-    .. warning:: 
-        Since environments based on control barrier functions require special judgment and control of environmental dynamics, 
+
+    .. warning::
+        Since environments based on control barrier functions require special judgment and control of environmental dynamics,
         they do not support the use of vectorized environments for parallelization.
 
     Attributes:
         need_auto_reset_wrapper (bool): Whether to use auto reset wrapper.
         need_time_limit_wrapper (bool): Whether to use time limit wrapper.
     """
+
     need_auto_reset_wrapper = True
     need_time_limit_wrapper = False
     _support_envs: ClassVar[list[str]] = [
@@ -70,7 +72,7 @@ def __init__(
         super().__init__(env_id)
         self._env_id = env_id
         if num_envs == 1:
-            self._env = gymnasium.make(id=env_id, autoreset=False, **kwargs)
+            self._env = gymnasium.make(id=env_id, autoreset=False)
             self._env_specific_setting()
             assert isinstance(self._env.action_space, Box), 'Only support Box action space.'
             assert isinstance(
@@ -82,21 +84,26 @@ def __init__(
         else:
             raise NotImplementedError('Only support num_envs=1 now.')
         self._device = torch.device(device)
-
+        self._episodic_violation = []
         self._num_envs = num_envs
         self._metadata = self._env.metadata
+        self.env_spec_log = {'Metrics/Max_angle_violation': 0.0}
 
-    def _env_specific_setting(self):
+    def _env_specific_setting(self) -> None:
         """Execute some specific setting for environments.
-        
-        Some algorithms based on control barrier functions have made fine-tuning adjustments to the environment. 
+
+        Some algorithms based on control barrier functions have made fine-tuning adjustments to the environment.
         We have organized these adjustments and encapsulated them in this function.
         """
         if self._env_id == 'Pendulum-v1':
-            self._env.unwrapped.max_torque = 15.
-            self._env.unwrapped.max_speed = 60.
-            self._env.unwrapped.action_space = spaces.Box(low=-self._env.unwrapped.max_torque, high=self._env.unwrapped.max_torque, shape=(1,))
-            high = np.array([1., 1., self._env.unwrapped.max_speed])
+            self._env.unwrapped.max_torque = 15.0
+            self._env.unwrapped.max_speed = 60.0
+            self._env.unwrapped.action_space = spaces.Box(
+                low=-self._env.unwrapped.max_torque,
+                high=self._env.unwrapped.max_torque,
+                shape=(1,),
+            )
+            high = np.array([1.0, 1.0, self._env.unwrapped.max_speed])
             self._env.unwrapped.observation_space = spaces.Box(low=-high, high=high)
             self._env.dt = 0.05
             self._env.dynamics_mode = 'Pendulum'
@@ -139,6 +146,7 @@ def step(
             for x in (obs, reward, terminated, truncated)
         )
         cost = torch.abs(torch.atan2(obs[1], obs[0])).to(self._device)
+        self._episodic_violation.append(cost)
 
         if 'final_observation' in info:
             info['final_observation'] = np.array(
@@ -155,6 +163,20 @@ def step(
 
         return obs, reward, cost, terminated, truncated, info
 
+    def spec_log(self, logger: Logger) -> None:
+        """Log specific environment into logger.
+
+        Max angle violation in one episode.
+
+        .. note::
+            This function will be called after each episode.
+
+        Args:
+            logger (Logger): The logger to use for logging.
+        """
+        logger.store({'Metrics/Max_angle_violation': max(self._episodic_violation)})
+        self._episodic_violation = []
+
     def reset(
         self,
         seed: int | None = None,
@@ -172,7 +194,7 @@ def reset(
         """
         obs, info = self._env.reset(seed=seed, options=options)
         if self._env_id == 'Pendulum-v1':
-            while (self._env.unwrapped.state[0] > 1.0 or self._env.unwrapped.state[0] < -1.0):
+            while self._env.unwrapped.state[0] > 1.0 or self._env.unwrapped.state[0] < -1.0:
                 obs, info = self._env.reset(options=options)
         return torch.as_tensor(obs, dtype=torch.float32, device=self._device), info
 
@@ -184,14 +206,6 @@ def set_seed(self, seed: int) -> None:
         """
         self.reset(seed=seed)
 
-    def sample_action(self) -> torch.Tensor:
-        """Sample a random action.
-
-        Returns:
-            A random action.
-        """
-        return torch.normal(torch.zeros(self.action_space.shape), torch.ones(self.action_space.shape))
-
     def render(self) -> Any:
         """Render the environment.
 
@@ -205,5 +219,5 @@ def close(self) -> None:
         self._env.close()
 
     @property
-    def unwrapped(self):
-        return self._env.unwrapped
\ No newline at end of file
+    def unwrapped(self) -> gymnasium.Env:
+        return self._env.unwrapped
diff --git a/omnisafe/envs/robust_barrier_function_env.py b/omnisafe/envs/robust_barrier_function_env.py
index 12e680b86..1f1c10418 100644
--- a/omnisafe/envs/robust_barrier_function_env.py
+++ b/omnisafe/envs/robust_barrier_function_env.py
@@ -18,33 +18,33 @@
 
 from typing import Any, ClassVar
 
+import gymnasium
 import numpy as np
 import torch
+from gymnasium import spaces
 
-import gymnasium
 from omnisafe.envs.core import CMDP, env_register
-from omnisafe.typing import Box
-from gymnasium import spaces
 from omnisafe.envs.unicycle_env import UnicycleEnv
+from omnisafe.typing import Box
 
 
 @env_register
 class RobustBarrierFunctionEnv(CMDP):
     """Interface of control barrier function-based environments.
-    
-    .. warning:: 
-        Since environments based on control barrier functions require special judgment and control of environmental dynamics, 
+
+    .. warning::
+        Since environments based on control barrier functions require special judgment and control of environmental dynamics,
         they do not support the use of vectorized environments for parallelization.
 
     Attributes:
         need_auto_reset_wrapper (bool): Whether to use auto reset wrapper.
         need_time_limit_wrapper (bool): Whether to use time limit wrapper.
     """
+
     need_auto_reset_wrapper = True
     need_time_limit_wrapper = False
     _support_envs: ClassVar[list[str]] = [
         'Unicycle',
-        'Pendulum-v1',
     ]
 
     def __init__(
@@ -74,9 +74,6 @@ def __init__(
         if num_envs == 1:
             if self._env_id == 'Unicycle':
                 self._env = UnicycleEnv()
-            elif self._env_id == 'Pendulum-v1':
-                self._env = gymnasium.make(id=env_id, autoreset=False, **kwargs)
-                self._env_specific_setting()
             else:
                 raise NotImplementedError('Only support Unicycle now.')
             assert isinstance(self._env.action_space, Box), 'Only support Box action space.'
@@ -93,19 +90,6 @@ def __init__(
         self._num_envs = num_envs
         self._metadata = self._env.metadata
 
-    def _env_specific_setting(self):
-        """Execute some specific setting for environments.
-        
-        Some algorithms based on control barrier functions have made fine-tuning adjustments to the environment. 
-        We have organized these adjustments and encapsulated them in this function.
-        """
-        if self._env_id == 'Pendulum-v1':
-            self._env.unwrapped.max_torque = 15.
-            self._env.unwrapped.max_speed = 60.
-            self._env.unwrapped.action_space = spaces.Box(low=-self._env.unwrapped.max_torque, high=self._env.unwrapped.max_torque, shape=(1,))
-            high = np.array([1., 1., self._env.unwrapped.max_speed])
-            self._env.unwrapped.observation_space = spaces.Box(low=-high, high=high)
-
     def step(
         self,
         action: torch.Tensor,
@@ -136,23 +120,13 @@ def step(
             truncated: Whether the episode has been truncated due to a time limit.
             info: Auxiliary diagnostic information (helpful for debugging, and sometimes learning).
         """
-        if self._env_id == 'Unicycle':
-            obs, reward, cost, terminated, truncated, info = self._env.step(
-                action.detach().cpu().numpy(),
-            )
-            obs, reward, cost, terminated, truncated = (
-                torch.as_tensor(x, dtype=torch.float32, device=self._device)
-                for x in (obs, reward, cost, terminated, truncated)
-            )
-        elif self._env_id == 'Pendulum-v1':
-            obs, reward, terminated, truncated, info = self._env.step(
-                action.detach().cpu().numpy(),
-            )
-            obs, reward, terminated, truncated = (
-                torch.as_tensor(x, dtype=torch.float32, device=self._device)
-                for x in (obs, reward, terminated, truncated)
-            )
-            cost = torch.abs(torch.atan2(obs[1], obs[0])).to(self._device)
+        obs, reward, cost, terminated, truncated, info = self._env.step(
+            action.detach().cpu().numpy(),
+        )
+        obs, reward, cost, terminated, truncated = (
+            torch.as_tensor(x, dtype=torch.float32, device=self._device)
+            for x in (obs, reward, cost, terminated, truncated)
+        )
         if 'final_observation' in info:
             info['final_observation'] = np.array(
                 [
@@ -184,9 +158,6 @@ def reset(
             info: Auxiliary diagnostic information (helpful for debugging, and sometimes learning).
         """
         obs, info = self._env.reset(seed=seed, options=options)
-        if self._env_id == 'Pendulum-v1':
-            while (self._env.unwrapped.state[0] > 1.0 or self._env.unwrapped.state[0] < -1.0):
-                obs, info = self._env.reset(options=options)
         return torch.as_tensor(obs, dtype=torch.float32, device=self._device), info
 
     def set_seed(self, seed: int) -> None:
@@ -203,7 +174,10 @@ def sample_action(self) -> torch.Tensor:
         Returns:
             A random action.
         """
-        return torch.normal(torch.zeros(self.action_space.shape), torch.ones(self.action_space.shape))
+        return torch.normal(
+            torch.zeros(self.action_space.shape),
+            torch.ones(self.action_space.shape),
+        )
 
     def render(self) -> Any:
         """Render the environment.
@@ -216,9 +190,6 @@ def render(self) -> Any:
     def close(self) -> None:
         """Close the environment."""
         self._env.close()
-    
-    def __getattr__(self, name):
-        try:
-            return getattr(self._env, name)
-        except AttributeError:
-            raise AttributeError(f"'{type(self).__name__}' object has no attribute '{name}'")
+
+    def __getattr__(self, name: str) -> Any:
+        return getattr(self._env, name)
diff --git a/omnisafe/envs/unicycle_env.py b/omnisafe/envs/unicycle_env.py
index fb16394a5..4fca58eed 100644
--- a/omnisafe/envs/unicycle_env.py
+++ b/omnisafe/envs/unicycle_env.py
@@ -1,33 +1,41 @@
-import numpy as np
+from __future__ import annotations
+
+from collections.abc import Iterable
+from typing import Any, Callable
+
 import gymnasium as gym
+import numpy as np
 from gymnasium import spaces
-from collections.abc import Iterable
 
 
-def to_pixel(meas_cm, shift=0):
+def to_pixel(meas_cm: list[float] | float, shift: int = 0) -> float:
+    """Convert measurements from centimeters to pixels.
 
+    Args:
+        meas_cm (list[float] | float): A single measurement or a list of measurements in centimeters.
+        shift (int, optional): An integer value that is added to the converted measurement(s). Default is 0.
+
+    Returns:
+        float | np.ndarray: The measurement converted to pixels.
+    """
     if isinstance(meas_cm, Iterable):
         return 1.5 * 37.795 * meas_cm + np.array(shift)
 
     return 1.5 * 37.795 * meas_cm + shift
 
+
 class UnicycleEnv(gym.Env):
     """Custom Environment that follows SafetyGym interface"""
 
-    metadata = {'render.modes': ['human']}
+    def __init__(self) -> None:
 
-    def __init__(self, obs_config='default'):
-
-        super(UnicycleEnv, self).__init__()
+        super().__init__()
 
         self.dynamics_mode = 'Unicycle'
-        # Define action and observation space
-        # They must be gym.spaces objects
-        # Example when using discrete actions:
         self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(2,))
         self.safe_action_space = spaces.Box(low=-2.5, high=2.5, shape=(2,))
         self.observation_space = spaces.Box(low=-1e10, high=1e10, shape=(7,))
-        self.bds = np.array([[-3., -3.], [3., 3.]])
+        self.bds = np.array([[-3.0, -3.0], [3.0, 3.0]])
 
         self.dt = 0.02
         self.max_episode_steps = 1000
@@ -36,9 +44,11 @@ def __init__(self, obs_config='default'):
         # Initialize Env
         self.state = None
         self.episode_step = 0
-        self.initial_state = np.array([[-2.5, -2.5, 0.0], [-2.5, 2.5, 0.0], [-2.5, 0.0, 0.0], [2.5, -2.5, np.pi/2]])
+        self.initial_state = np.array(
+            [[-2.5, -2.5, 0.0], [-2.5, 2.5, 0.0], [-2.5, 0.0, 0.0], [2.5, -2.5, np.pi / 2]],
+        )
         self.goal_pos = np.array([2.5, 2.5])
-        self.rand_init = False  # Random Initial State
+        self.rand_init = False
 
         self.reset()
 
@@ -49,148 +59,141 @@ def __init__(self, obs_config='default'):
         self.disturb_covar = np.diag([0.005, 0.005, 0.05]) * 20
 
         # Build Hazards
-        self.obs_config = obs_config
         self.hazards = []
-        if obs_config == 'default':  # default
-            self.hazards.append({'type': 'circle', 'radius': 0.6, 'location': 1.5*np.array([0., 0.])})
-            self.hazards.append({'type': 'circle', 'radius': 0.6, 'location': 1.5*np.array([-1., 1.])})
-            self.hazards.append({'type': 'circle', 'radius': 0.6, 'location': 1.5*np.array([-1., -1.])})
-            self.hazards.append({'type': 'circle', 'radius': 0.6, 'location': 1.5*np.array([1., -1.])})
-            self.hazards.append({'type': 'circle', 'radius': 0.6, 'location': 1.5*np.array([1., 1.])})
-        elif obs_config == 'test':
-            # self.build_hazards(obs_config)
-            self.hazards.append({'type': 'polygon', 'vertices': 0.6*np.array([[-1., -1.], [1., -1], [1., 1.], [-1., 1.]])})
-            self.hazards[-1]['vertices'][:, 0] += 0.5
-            self.hazards[-1]['vertices'][:, 1] -= 0.5
-            self.hazards.append({'type': 'circle', 'radius': 0.6, 'location': 1.5*np.array([1., 1.])})
-            self.hazards.append(
-                {'type': 'polygon', 'vertices': np.array([[0.9, 0.9], [2.1, 2.1], [2.1, 0.9]])})
-        else:
-            n_hazards = 6
-            hazard_radius = 0.6
-            self.get_random_hazard_locations(n_hazards, hazard_radius)
+
+        self.hazards.append(
+            {'type': 'circle', 'radius': 0.6, 'location': 1.5 * np.array([0.0, 0.0])},
+        )
+        self.hazards.append(
+            {'type': 'circle', 'radius': 0.6, 'location': 1.5 * np.array([-1.0, 1.0])},
+        )
+        self.hazards.append(
+            {'type': 'circle', 'radius': 0.6, 'location': 1.5 * np.array([-1.0, -1.0])},
+        )
+        self.hazards.append(
+            {'type': 'circle', 'radius': 0.6, 'location': 1.5 * np.array([1.0, -1.0])},
+        )
+        self.hazards.append(
+            {'type': 'circle', 'radius': 0.6, 'location': 1.5 * np.array([1.0, 1.0])},
+        )
 
         # Viewer
         self.viewer = None
 
-
-    def step(self, action):
-        """Organize the observation to understand what's going on
-
-        Parameters
-        ----------
-        action : ndarray
-                Action that the agent takes in the environment
-
-        Returns
-        -------
-        new_obs : ndarray
-          The new observation with the following structure:
-          [pos_x, pos_y, cos(theta), sin(theta), xdir2goal, ydir2goal, dist2goal]
-
+    def step(
+        self,
+        action: np.ndarray,
+    ) -> tuple[np.ndarray, float, float, bool, bool, dict[str, Any]]:
+        """
+        Advance the environment state based on the action taken by the agent.
+
+        Parameters:
+            action(np.ndarray): Control action taken by the agent.
+
+        Returns:
+            A tuple containing:
+            - new_obs : np.ndarray, the new observation structured as [pos_x, pos_y, cos(theta), sin(theta), xdir2goal, ydir2goal, dist2goal].
+            - reward : float, reward received after taking the action.
+            - cost : float, cost incurred after taking the action.
+            - terminated : bool, whether the episode has terminated.
+            - truncated : bool, whether the episode was truncated.
+            - info : dict, additional information about the environment's state.
         """
-
         action = np.clip(action, -1.0, 1.0)
         state, reward, cost, terminated, truncated, info = self._step(action)
         return self.get_obs(), reward, cost, terminated, truncated, info
 
-    def _step(self, action):
+    def _step(self, action: np.ndarray) -> tuple:
         """
-
-        Parameters
-        ----------
-        action
-
-        Returns
-        -------
-        state : ndarray
-            New internal state of the agent.
-        reward : float
-            Reward collected during this transition.
-        terminated : bool
-            Whether the episode terminated.
-        info : dict
-            Additional info relevant to the environment.
+        Update the internal state based on the action, considering dynamics and disturbances.
+
+        Parameters:
+            action(np.ndarray): Control action taken by the agent.
+
+        Returns:
+            A tuple containing:
+            - state : np.ndarray, new internal state of the agent.
+            - reward : float, reward collected during this transition.
+            - cost : float, cost incurred during this transition.
+            - terminated : bool, whether the episode has terminated.
+            - truncated : bool, whether the episode was truncated due to reaching a step limit.
+            - info : dict, additional information relevant to the environment.
         """
-
-        # Start with our prior for continuous time system x' = f(x) + g(x)u
         self.state += self.dt * (self.get_f(self.state) + self.get_g(self.state) @ action)
-        self.state -= self.dt * 0.1 * self.get_g(self.state) @ np.array([np.cos(self.state[2]),  0])  #* np.random.multivariate_normal(self.disturb_mean, self.disturb_covar, 1).squeeze()
+        self.state -= self.dt * 0.1 * self.get_g(self.state) @ np.array([np.cos(self.state[2]), 0])
 
         self.episode_step += 1
 
-        info = dict()
-
         dist_goal = self._goal_dist()
-        reward = (self.last_goal_dist - dist_goal)  # -1e-3 * dist_goal
+        reward = self.last_goal_dist - dist_goal
         self.last_goal_dist = dist_goal
-        # Check if goal is met
         terminated = False
         if self.goal_met():
-            info['goal_met'] = True
             reward += self.reward_goal
             terminated = True
         truncated = self.episode_step >= self.max_episode_steps
 
-        # Include constraint cost in reward (only during training, i.e. obs_config=='default')
-        if self.obs_config == 'default':
-            info['cost'] = 0
-            for hazard in self.hazards:
-                if hazard['type'] == 'circle': # They should all be circles if 'default'
-                    info['cost'] += 0.1 * (np.sum((self.state[:2] - hazard['location']) ** 2) < hazard['radius'] ** 2)
-        return self.state, reward, info['cost'], terminated, truncated, info
-
-    def goal_met(self):
-        """Return true if the current goal is met this step
+        cost = 0.0
+        for hazard in self.hazards:
+            if hazard['type'] == 'circle':
+                cost += 0.1 * (
+                    np.sum((self.state[:2] - hazard['location']) ** 2) < hazard['radius'] ** 2
+                )
 
-        Returns
-        -------
-        goal_met : bool
-            True if the goal condition is met.
+        return self.state, reward, cost, terminated, truncated, {}
 
+    def goal_met(self) -> bool:
         """
+        Check if the current goal has been met in this step.
 
+        Returns:
+            True if the agent has reached the goal, False otherwise.
+        """
         return np.linalg.norm(self.state[:2] - self.goal_pos) <= self.goal_size
 
-    def reset(self, seed=None, options=None):
-        """ Reset the state of the environment to an initial state.
-
-        Returns
-        -------
-        observation : ndarray
-            Next observation.
+    def reset(self, seed: int | None = None, options: dict | None = None) -> tuple:
+        """
+        Reset the environment to an initial state.
+
+        Parameters:
+            seed : int, optional
+                Seed for random number generator.
+            options : dict, optional
+                Additional options to customize the environment reset.
+
+        Returns:
+            A tuple containing:
+            - observation : np.ndarray, the first observation after reset.
+            - info : dict, additional information about the reset state.
         """
-
         self.episode_step = 0
 
-        # Re-initialize state
         if self.rand_init:
             self.state = np.copy(self.initial_state[np.random.randint(self.initial_state.shape[0])])
         else:
             self.state = np.copy(self.initial_state[0])
 
-        # Re-initialize last goal dist
         self.last_goal_dist = self._goal_dist()
 
-        return self.get_obs(), dict()
+        return self.get_obs(), {}
 
-    def render(self, mode='human', close=False):
+    def render(self, mode: str = 'human') -> np.ndarray:
         """Render the environment to the screen
 
-        Parameters
-        ----------
+        Parameters:---
         mode : str
         close : bool
 
-        Returns
-        -------
+        Returns:
 
         """
 
         if mode != 'human' and mode != 'rgb_array':
             rel_loc = self.goal_pos - self.state[:2]
             theta_error = np.arctan2(rel_loc[1], rel_loc[0]) - self.state[2]
-            print('Ep_step = {}, \tState = {}, \tDist2Goal = {}, alignment_error = {}'.format(self.episode_step, self.state, self._goal_dist(), theta_error))
+            print(
+                f'Ep_step = {self.episode_step}, \tState = {self.state}, \tDist2Goal = {self._goal_dist()}, alignment_error = {theta_error}',
+            )
 
         screen_width = 600
         screen_height = 400
@@ -203,24 +206,52 @@ def render(self, mode='human', close=False):
             obstacles = []
             for i in range(len(self.hazards)):
                 if self.hazards[i]['type'] == 'circle':
-                    obstacles.append(pyglet_rendering.make_circle(radius=to_pixel(self.hazards[i]['radius'], shift=0), filled=True))
-                    obs_trans = pyglet_rendering.Transform(translation=(to_pixel(self.hazards[i]['location'][0], shift=screen_width/2), to_pixel(self.hazards[i]['location'][1], shift=screen_height/2)))
+                    obstacles.append(
+                        pyglet_rendering.make_circle(
+                            radius=to_pixel(self.hazards[i]['radius'], shift=0),
+                            filled=True,
+                        ),
+                    )
+                    obs_trans = pyglet_rendering.Transform(
+                        translation=(
+                            to_pixel(self.hazards[i]['location'][0], shift=screen_width / 2),
+                            to_pixel(self.hazards[i]['location'][1], shift=screen_height / 2),
+                        ),
+                    )
                     obstacles[i].set_color(1.0, 0.0, 0.0)
                     obstacles[i].add_attr(obs_trans)
                 elif self.hazards[i]['type'] == 'polygon':
-                    obstacles.append(pyglet_rendering.make_polygon(to_pixel(self.hazards[i]['vertices'], shift=[screen_width/2, screen_height/2]), filled=True))
+                    obstacles.append(
+                        pyglet_rendering.make_polygon(
+                            to_pixel(
+                                self.hazards[i]['vertices'],
+                                shift=[screen_width / 2, screen_height / 2],
+                            ),
+                            filled=True,
+                        ),
+                    )
                 self.viewer.add_geom(obstacles[i])
 
             # Make Goal
             goal = pyglet_rendering.make_circle(radius=to_pixel(0.1, shift=0), filled=True)
-            goal_trans = pyglet_rendering.Transform(translation=(to_pixel(self.goal_pos[0], shift=screen_width/2), to_pixel(self.goal_pos[1], shift=screen_height/2)))
+            goal_trans = pyglet_rendering.Transform(
+                translation=(
+                    to_pixel(self.goal_pos[0], shift=screen_width / 2),
+                    to_pixel(self.goal_pos[1], shift=screen_height / 2),
+                ),
+            )
             goal.add_attr(goal_trans)
             goal.set_color(0.0, 0.5, 0.0)
             self.viewer.add_geom(goal)
 
             # Make Robot
             self.robot = pyglet_rendering.make_circle(radius=to_pixel(0.1), filled=True)
-            self.robot_trans = pyglet_rendering.Transform(translation=(to_pixel(self.state[0], shift=screen_width/2), to_pixel(self.state[1], shift=screen_height/2)))
+            self.robot_trans = pyglet_rendering.Transform(
+                translation=(
+                    to_pixel(self.state[0], shift=screen_width / 2),
+                    to_pixel(self.state[1], shift=screen_height / 2),
+                ),
+            )
             self.robot_trans.set_rotation(self.state[2])
             self.robot.add_attr(self.robot_trans)
             self.robot.set_color(0.5, 0.5, 0.8)
@@ -234,20 +265,18 @@ def render(self, mode='human', close=False):
         if self.state is None:
             return None
 
-        self.robot_trans.set_translation(to_pixel(self.state[0], shift=screen_width/2), to_pixel(self.state[1], shift=screen_height/2))
+        self.robot_trans.set_translation(
+            to_pixel(self.state[0], shift=screen_width / 2),
+            to_pixel(self.state[1], shift=screen_height / 2),
+        )
         self.robot_trans.set_rotation(self.state[2])
 
-        return self.viewer.render(return_rgb_array=mode == "rgb_array")
-
-    def get_obs(self):
-        """Given the state, this function returns it to an observation akin to the one obtained by calling env.step
+        return self.viewer.render(return_rgb_array=mode == 'rgb_array')
 
-        Parameters
-        ----------
+    def get_obs(self) -> np.ndarray:
+        """Given the state, this function returns corresponding observation.
 
-        Returns
-        -------
-        observation : ndarray
+        Returns:
           Observation: [pos_x, pos_y, cos(theta), sin(theta), xdir2goal, ydir2goal, exp(-dist2goal)]
         """
 
@@ -255,36 +284,43 @@ def get_obs(self):
         goal_dist = np.linalg.norm(rel_loc)
         goal_compass = self.obs_compass()  # compass to the goal
 
-        return np.array([self.state[0], self.state[1], np.cos(self.state[2]), np.sin(self.state[2]), goal_compass[0], goal_compass[1], np.exp(-goal_dist)])
-
-    def _get_dynamics(self):
-        """Get affine CBFs for a given environment.
-
-        Parameters
-        ----------
-
-        Returns
-        -------
-        get_f : callable
-                Drift dynamics of the continuous system x' = f(x) + g(x)u
-        get_g : callable
-                Control dynamics of the continuous system x' = f(x) + g(x)u
+        return np.array(
+            [
+                self.state[0],
+                self.state[1],
+                np.cos(self.state[2]),
+                np.sin(self.state[2]),
+                goal_compass[0],
+                goal_compass[1],
+                np.exp(-goal_dist),
+            ],
+        )
+
+    def _get_dynamics(self) -> tuple[Callable, Callable]:
+        """Get affine Control Barrier Function (CBF) dynamics for a given environment.
+
+        This method provides access to the system's drift and control dynamics, formulated for continuous systems of the form x' = f(x) + g(x)u, where 'x' is the state vector and 'u' is the control vector.
+
+        Returns:
+            get_f : Callable[[np.ndarray], np.ndarray]
+                Function to compute the drift dynamics 'f(x)' of the system.
+
+            get_g : Callable[[np.ndarray], np.ndarray]
+                Function to compute the control dynamics 'g(x)' of the system.
         """
 
-        def get_f(state):
-            f_x = np.zeros(state.shape)
-            return f_x
+        def get_f(state: np.ndarray) -> np.ndarray:
+            """Function to compute the drift dynamics 'f(x)' of the system."""
+            return np.zeros(state.shape)
 
-        def get_g(state):
+        def get_g(state: np.ndarray) -> np.ndarray:
+            """Function to compute the control dynamics 'g(x)' of the system."""
             theta = state[2]
-            g_x = np.array([[np.cos(theta), 0],
-                            [np.sin(theta), 0],
-                            [            0, 1.0]])
-            return g_x
+            return np.array([[np.cos(theta), 0], [np.sin(theta), 0], [0, 1.0]])
 
         return get_f, get_g
 
-    def obs_compass(self):
+    def obs_compass(self) -> np.ndarray:
         """
         Return a robot-centric compass observation of a list of positions.
         Compass is a normalized (unit-lenght) egocentric XY vector,
@@ -297,33 +333,36 @@ def obs_compass(self):
         # Get ego vector in world frame
         vec = self.goal_pos - self.state[:2]
         # Rotate into frame
-        R = np.array([[np.cos(self.state[2]), -np.sin(self.state[2])], [np.sin(self.state[2]), np.cos(self.state[2])]])
+        R = np.array(
+            [
+                [np.cos(self.state[2]), -np.sin(self.state[2])],
+                [np.sin(self.state[2]), np.cos(self.state[2])],
+            ],
+        )
         vec = np.matmul(vec, R)
         # Normalize
         vec /= np.sqrt(np.sum(np.square(vec))) + 0.001
         return vec
 
-    def _goal_dist(self):
+    def _goal_dist(self) -> np.ndarray:
         return np.linalg.norm(self.goal_pos - self.state[:2])
 
-    def close(self):
+    def close(self) -> None:
         if self.viewer:
             self.viewer.close()
             self.viewer = None
 
-    def get_random_hazard_locations(self, n_hazards: int, hazard_radius: float):
+    def get_random_hazard_locations(self, n_hazards: int, hazard_radius: float) -> None:
         """
 
-        Parameters
-        ----------
+        Parameters:---
         n_hazards : int
             Number of hazards to create
         hazard_radius : float
             Radius of hazards
 
-        Returns
-        -------
-        hazards_locs : ndarray
+        Returns:
+        hazards_locs : np.ndarray
             Numpy array of shape (n_hazards, 2) containing xy locations of hazards.
         """
 
@@ -335,28 +374,64 @@ def get_random_hazard_locations(self, n_hazards: int, hazard_radius: float):
         hazards = []
         hazards_centers = np.zeros((n_hazards, 2))
         n = 0  # Number of hazards actually placed
-        for i in range(n_hazards):
+        for _ in range(n_hazards):
             successfully_placed = False
-            iter = 0
+            iteration = 0
             hazard_type = np.random.randint(3)  # 0-> Circle 1->Square 2->Triangle
-            radius = hazard_radius * (1-0.2*2.0*(np.random.random() - 0.5))
-            while not successfully_placed and iter < 100:
-                hazards_centers[n] = (buffered_bds[1] - buffered_bds[0]) * np.random.random(2) + buffered_bds[0]
-                successfully_placed = np.all(np.linalg.norm(hazards_centers[:n] - hazards_centers[[n]], axis=1) > 3.5*hazard_radius)
-                successfully_placed = np.logical_and(successfully_placed, np.linalg.norm(self.goal_pos - hazards_centers[n]) > 2.0*hazard_radius)
-                successfully_placed = np.logical_and(successfully_placed, np.all(np.linalg.norm(self.initial_state[:, :2] - hazards_centers[[n]], axis=1) > 2.0*hazard_radius))
-                iter += 1
+            radius = hazard_radius * (1 - 0.2 * 2.0 * (np.random.random() - 0.5))
+            while not successfully_placed and iteration < 100:
+                hazards_centers[n] = (buffered_bds[1] - buffered_bds[0]) * np.random.random(
+                    2,
+                ) + buffered_bds[0]
+                successfully_placed = np.all(
+                    np.linalg.norm(hazards_centers[:n] - hazards_centers[[n]], axis=1)
+                    > 3.5 * hazard_radius,
+                )
+                successfully_placed = np.logical_and(
+                    successfully_placed,
+                    np.linalg.norm(self.goal_pos - hazards_centers[n]) > 2.0 * hazard_radius,
+                )
+                successfully_placed = np.logical_and(
+                    successfully_placed,
+                    np.all(
+                        np.linalg.norm(self.initial_state[:, :2] - hazards_centers[[n]], axis=1)
+                        > 2.0 * hazard_radius,
+                    ),
+                )
+                iteration += 1
             if not successfully_placed:
                 continue
             if hazard_type == 0:  # Circle
                 hazards.append({'type': 'circle', 'location': hazards_centers[n], 'radius': radius})
             elif hazard_type == 1:  # Square
-                hazards.append({'type': 'polygon', 'vertices': np.array(
-                    [[-radius, -radius], [-radius, radius], [radius, radius], [radius, -radius]])})
+                hazards.append(
+                    {
+                        'type': 'polygon',
+                        'vertices': np.array(
+                            [
+                                [-radius, -radius],
+                                [-radius, radius],
+                                [radius, radius],
+                                [radius, -radius],
+                            ],
+                        ),
+                    },
+                )
                 hazards[-1]['vertices'] += hazards_centers[n]
             else:  # Triangle
-                hazards.append({'type': 'polygon', 'vertices': np.array(
-                    [[-radius, -radius], [-radius, radius], [radius, radius], [radius, -radius]])})
+                hazards.append(
+                    {
+                        'type': 'polygon',
+                        'vertices': np.array(
+                            [
+                                [-radius, -radius],
+                                [-radius, radius],
+                                [radius, radius],
+                                [radius, -radius],
+                            ],
+                        ),
+                    },
+                )
                 # Pick a vertex and delete it
                 idx = np.random.randint(4)
                 hazards[-1]['vertices'] = np.delete(hazards[-1]['vertices'], idx, axis=0)
diff --git a/omnisafe/evaluator.py b/omnisafe/evaluator.py
index 8732d6e34..2f17f852b 100644
--- a/omnisafe/evaluator.py
+++ b/omnisafe/evaluator.py
@@ -301,6 +301,39 @@ def __load_model_and_env(
             )
             self._actor = actor_builder.build_actor(actor_type)
             self._actor.load_state_dict(model_params['pi'])
+            if self._cfgs['algo'] == 'DDPGCBF' or self._cfgs['algo'] == 'TRPOCBF':
+                from omnisafe.common.barrier_comp import BarrierCompensator
+
+                self.compensator = BarrierCompensator(
+                    obs_dim=observation_space.shape[0],
+                    act_dim=action_space.shape[0],
+                    cfgs=self._cfgs['compensator_cfgs'],
+                )
+                model_path = os.path.join(save_dir, 'torch_save', model_name)
+                try:
+                    model_params = torch.load(model_path)
+                except FileNotFoundError as error:
+                    raise FileNotFoundError(
+                        'The model is not found in the save directory.',
+                    ) from error
+                self.compensator.load_state_dict(model_params['compensator'])
+            if self._cfgs['algo'] == 'SACRCBF':
+                from omnisafe.common.robust_barrier_solver import CBFQPLayer
+                from omnisafe.common.robust_gp_model import DynamicsModel
+
+                epoch = model_name.split('.pt')[0].split('-')[-1]
+                self.solver = CBFQPLayer(
+                    env=self._env,
+                    device=self._cfgs['train_cfgs']['device'],
+                    gamma_b=self._cfgs['cbf_cfgs']['gamma_b'],
+                    k_d=self._cfgs['cbf_cfgs']['k_d'],
+                    l_p=self._cfgs['cbf_cfgs']['l_p'],
+                )
+                self.dynamics_model = DynamicsModel(env=self._env)
+                self.dynamics_model.load_disturbance_models(
+                    save_dir=os.path.join(self._save_dir, 'gp_model_save'),
+                    epoch=epoch,
+                )
 
         if self._cfgs['algo'] in ['CRABS']:
             self._init_crabs(model_params)
@@ -377,11 +410,23 @@ def load_saved(
         # load the config
         self._save_dir = save_dir
         self._model_name = model_name
+        epoch = model_name.split('.pt')[0].split('-')[-1]
 
         self.__load_cfgs(save_dir)
 
         self.__set_render_mode(render_mode)
 
+        if self._cfgs['algo'] == 'DDPGCBF' or self._cfgs['algo'] == 'TRPOCBF':
+            from omnisafe.common.barrier_solver import PendulumSolver
+
+            self.solver = PendulumSolver()
+            path = os.path.join(
+                save_dir,
+                'gp_model_save',
+                f'gaussian_process_regressor_{epoch}.pkl',
+            )
+            self.solver.build_gp_model(save_dir=path)
+
         env_kwargs = {
             'env_id': self._cfgs['env_id'],
             'num_envs': 1,
@@ -452,6 +497,32 @@ def evaluate(
                         raise ValueError(
                             'The policy must be provided or created before evaluating the agent.',
                         )
+                if self._cfgs['algo'] == 'DDPGCBF' or self._cfgs['algo'] == 'TRPOCBF':
+                    approx_compensating_act = self.compensator(obs=obs)
+                    compensated_act_mean_raw = act + approx_compensating_act
+                    [f, g, x, std] = self.solver.get_gp_dynamics(obs, use_prev_model=False)
+                    compensating_act = self.solver.control_barrier(
+                        compensated_act_mean_raw,
+                        f,
+                        g,
+                        x,
+                        std,
+                    )
+                    act = compensated_act_mean_raw + compensating_act
+
+                if self._cfgs['algo'] == 'SACRCBF':
+                    state_batch = self.dynamics_model.get_state(obs)
+                    mean_pred_batch, sigma_pred_batch = self.dynamics_model.predict_disturbance(
+                        state_batch,
+                    )
+                    safe_act = self.solver.get_safe_action(
+                        state_batch,
+                        act,
+                        mean_pred_batch,
+                        sigma_pred_batch,
+                    )
+                    act = safe_act
+
                 obs, rew, cost, terminated, truncated, _ = self._env.step(act)
                 if 'Saute' in self._cfgs['algo'] or 'Simmer' in self._cfgs['algo']:
                     self._safety_obs -= cost.unsqueeze(-1) / self._safety_budget
diff --git a/omnisafe/models/actor/actor_builder.py b/omnisafe/models/actor/actor_builder.py
index 80c68e1be..75358134c 100644
--- a/omnisafe/models/actor/actor_builder.py
+++ b/omnisafe/models/actor/actor_builder.py
@@ -16,12 +16,12 @@
 
 from __future__ import annotations
 
+from omnisafe.models.actor.beta_learning_actor import BetaLearningActor
 from omnisafe.models.actor.gaussian_learning_actor import GaussianLearningActor
 from omnisafe.models.actor.gaussian_sac_actor import GaussianSACActor
 from omnisafe.models.actor.mlp_actor import MLPActor
 from omnisafe.models.actor.perturbation_actor import PerturbationActor
 from omnisafe.models.actor.vae_actor import VAE
-from omnisafe.models.actor.beta_learning_actor import BetaLearningActor
 from omnisafe.models.base import Actor
 from omnisafe.typing import Activation, ActorType, InitFunction, OmnisafeSpace
 
diff --git a/omnisafe/models/actor/beta_learning_actor.py b/omnisafe/models/actor/beta_learning_actor.py
index 8f9675934..e0ee6b3e9 100644
--- a/omnisafe/models/actor/beta_learning_actor.py
+++ b/omnisafe/models/actor/beta_learning_actor.py
@@ -18,22 +18,19 @@
 
 import torch
 import torch.nn as nn
-import numpy as np
+from torch.distributions import Beta, Distribution
 
-from torch.distributions import Distribution, Beta
-
-from omnisafe.models.actor.gaussian_actor import GaussianActor
+from omnisafe.models.base import Actor
 from omnisafe.typing import Activation, InitFunction, OmnisafeSpace
 from omnisafe.utils.model import build_mlp_network
-from omnisafe.models.base import Actor
 
 
 # pylint: disable-next=too-many-instance-attributes
 class BetaLearningActor(Actor):
-
+    """Initialize an instance of :class:`BetaLearningActor`."""
 
     _current_dist: Beta
-    
+
     def __init__(
         self,
         obs_space: OmnisafeSpace,
@@ -42,30 +39,30 @@ def __init__(
         activation: Activation = 'relu',
         weight_initialization_mode: InitFunction = 'kaiming_uniform',
     ) -> None:
-        """Initialize an instance of :class:`GaussianLearningActor`."""
+        """Initialize an instance of :class:`BetaLearningActor`."""
         super().__init__(obs_space, act_space, hidden_sizes, activation, weight_initialization_mode)
-        
+
         self.mean: nn.Module = build_mlp_network(
             sizes=[self._obs_dim, self._hidden_sizes[0], self._hidden_sizes[0]],
             activation=activation,
             output_activation='tanh',
             weight_initialization_mode=weight_initialization_mode,
         )
-        
+
         self.alpha_net: nn.Module = build_mlp_network(
             sizes=[self._hidden_sizes[-1], self._act_dim],
             activation='identity',
             output_activation='softplus',
             weight_initialization_mode=weight_initialization_mode,
         )
-        
+
         self.beta_net: nn.Module = build_mlp_network(
             sizes=[self._hidden_sizes[-1], self._act_dim],
             activation='identity',
             output_activation='softplus',
             weight_initialization_mode=weight_initialization_mode,
         )
-        
+
     def _distribution(self, obs: torch.Tensor) -> Beta:
         """Get the distribution of the actor.
 
@@ -80,8 +77,8 @@ def _distribution(self, obs: torch.Tensor) -> Beta:
             The normal distribution of the mean and standard deviation from the actor.
         """
         mean = self.mean(obs)
-        alphas = 1.0+self.alpha_net(mean)
-        betas = 1.0+self.beta_net(mean)
+        alphas = 1.0 + self.alpha_net(mean)
+        betas = 1.0 + self.beta_net(mean)
         return Beta(alphas, betas)
 
     def predict(self, obs: torch.Tensor, deterministic: bool = False) -> torch.Tensor:
diff --git a/pyproject.toml b/pyproject.toml
index a74b46723..350414746 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -39,6 +39,11 @@ dependencies = [
     "matplotlib >= 3.7.1",
     "gdown >= 4.6.0",
     "pytorch_lightning >= 2.2.2",
+    "cvxopt== 1.3.2",
+    "gpytorch== 1.11",
+    "joblib == 1.3.2",
+    "qpth == 0.0.16",
+    "scikit_learn == 1.3.2"
 ]
 dynamic = ["version", "entry-points"]
 
diff --git a/requirements.txt b/requirements.txt
index 0abf5e41a..03fec36c3 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -12,3 +12,8 @@ seaborn >= 0.12.2
 pandas >=  1.5.3
 matplotlib >= 3.7.1
 gdown >= 4.6.0
+cvxopt==1.3.2
+gpytorch==1.11
+joblib==1.3.2
+qpth==0.0.16
+scikit_learn==1.3.2

From 71ffe782ccb28768f3aeba93e7ed9c5a6bfcab29 Mon Sep 17 00:00:00 2001
From: Gaiejj <524339208@qq.com>
Date: Fri, 3 May 2024 22:06:04 +0800
Subject: [PATCH 03/18] wip

---
 .pre-commit-config.yaml                       |   4 +-
 docs/source/spelling_wordlist.txt             |  27 +++
 omnisafe/adapter/barrier_function_adapter.py  |  43 +++--
 .../adapter/beta_barrier_function_adapter.py  |  61 +++---
 .../offpolicy_barrier_function_adapter.py     |  84 +++++---
 .../robust_barrier_function_adapter.py        |  32 +++-
 omnisafe/algorithms/off_policy/ddpg.py        |   2 +-
 omnisafe/algorithms/off_policy/ddpg_cbf.py    |  26 +--
 omnisafe/algorithms/off_policy/sac_rcbf.py    |   6 +-
 .../on_policy/barrier_function/ppo_cbf.py     |  10 +-
 .../on_policy/barrier_function/trpo_cbf.py    |  36 +++-
 .../on_policy/base/policy_gradient.py         |  22 ++-
 omnisafe/common/barrier_comp.py               |   5 +-
 omnisafe/common/barrier_solver.py             |  37 ++--
 omnisafe/common/robust_barrier_solver.py      | 126 +++++-------
 omnisafe/common/robust_gp_model.py            |  86 +++++----
 omnisafe/common/utils.py                      |  55 ------
 omnisafe/configs/on-policy/TRPOCBF.yaml       |   4 +-
 omnisafe/envs/barrier_function_env.py         |  24 +--
 omnisafe/envs/robust_barrier_function_env.py  |  19 +-
 omnisafe/envs/unicycle_env.py                 | 180 +-----------------
 omnisafe/evaluator.py                         |  47 +++--
 omnisafe/utils/tools.py                       |  37 ++++
 23 files changed, 446 insertions(+), 527 deletions(-)
 delete mode 100644 omnisafe/common/utils.py

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 99b01f43f..42e2956f9 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -117,5 +117,7 @@ repos:
             ^omnisafe/common/control_barrier_function/crabs/models.py$|
             ^omnisafe/common/control_barrier_function/crabs/optimizers.py$|
             ^omnisafe/common/control_barrier_function/crabs/utils.py$|
-            ^conftest.py$
+            ^conftest.py$|
+            ^omnisafe/envs/unicycle_env.py|
+            ^setup.py$
           )
diff --git a/docs/source/spelling_wordlist.txt b/docs/source/spelling_wordlist.txt
index 460cabd1a..46e297388 100644
--- a/docs/source/spelling_wordlist.txt
+++ b/docs/source/spelling_wordlist.txt
@@ -486,3 +486,30 @@ UpdateDynamics
 mathbb
 meger
 Jupyter
+compensator
+CBF
+Vectorize
+gp
+optim
+cvx
+QP
+gpytorch
+ExactGP
+RBF
+parallelization
+compensators
+thetadot
+VK
+Sharma
+Kosaraju
+Seetharaman
+Sadler
+Suttle
+Cheng
+Orosz
+JW
+Burdick
+Vipul
+Sivaranjani
+Vijay
+suttle
diff --git a/omnisafe/adapter/barrier_function_adapter.py b/omnisafe/adapter/barrier_function_adapter.py
index 735ff690e..a91218b48 100644
--- a/omnisafe/adapter/barrier_function_adapter.py
+++ b/omnisafe/adapter/barrier_function_adapter.py
@@ -18,6 +18,7 @@
 
 import torch
 from rich.progress import track
+from sklearn.gaussian_process import GaussianProcessRegressor
 
 from omnisafe.adapter.onpolicy_adapter import OnPolicyAdapter
 from omnisafe.common.barrier_comp import BarrierCompensator
@@ -46,8 +47,8 @@ class BarrierFunctionAdapter(OnPolicyAdapter):
     def __init__(self, env_id: str, num_envs: int, seed: int, cfgs: Config) -> None:
         """Initialize an instance of :class:`BarrierFunctionAdapter`."""
         super().__init__(env_id, num_envs, seed, cfgs)
-        self.solver = None
-        self.compensator = None
+        self.solver: PendulumSolver
+        self.compensator: BarrierCompensator
         self.first_iter = 1
 
     def _wrapper(
@@ -85,16 +86,15 @@ def _wrapper(
 
     def set_solver(self, solver: PendulumSolver) -> None:
         """Set the barrier function solver for Pendulum environment."""
-        self.solver: PendulumSolver = solver
+        self.solver = solver
 
     def set_compensator(self, compensator: BarrierCompensator) -> None:
         """Set the action compensator."""
-        self.compensator: BarrierCompensator = compensator
+        self.compensator = compensator
 
     def reset_gp_model(self) -> None:
         """Reset the gaussian processing model of barrier function solver."""
-        self.solver.GP_model_prev = self.solver.GP_model.copy()
-        self.solver.build_GP_model()
+        self.solver.reset_gp_model()
 
     def rollout(  # pylint: disable=too-many-locals
         self,
@@ -103,7 +103,7 @@ def rollout(  # pylint: disable=too-many-locals
         buffer: VectorOnPolicyBuffer,
         logger: Logger,
     ) -> None:
-        """Rollout the environment and store the data in the buffer.
+        """Rollout the environment with barrier function controller.
 
         Args:
             steps_per_epoch (int): Number of steps per epoch.
@@ -117,8 +117,6 @@ def rollout(  # pylint: disable=too-many-locals
             self.reset_gp_model()
 
         obs, _ = self.reset()
-        while abs(self._env.unwrapped.state[0]) > 1:
-            obs, _ = self._env.reset()
         path_obs = []
         path_act = []
         for step in track(
@@ -135,9 +133,9 @@ def rollout(  # pylint: disable=too-many-locals
                 compensated_act_mean_raw = act_mean + approx_compensating_act
 
                 if self.first_iter:
-                    [f, g, x, std] = self.solver.get_GP_dynamics(obs, use_prev_model=False)
+                    [f, g, x, std] = self.solver.get_gp_dynamics(obs, use_prev_model=False)
                 else:
-                    [f, g, x, std] = self.solver.get_GP_dynamics(obs, use_prev_model=True)
+                    [f, g, x, std] = self.solver.get_gp_dynamics(obs, use_prev_model=True)
 
                 compensating_act = self.solver.control_barrier(
                     compensated_act_mean_raw,
@@ -150,16 +148,15 @@ def rollout(  # pylint: disable=too-many-locals
                 compensated_act_mean = compensated_act_mean_raw + compensating_act
                 final_act = torch.normal(compensated_act_mean, act_std)
 
-            logp = agent.actor.log_prob(final_act).detach()
-            path_obs.append(obs.detach().cpu().squeeze().numpy())
-            path_act.append(final_act.detach().cpu().squeeze().numpy())
+                logp = agent.actor.log_prob(final_act)
+
+            path_obs.append(obs)
+            path_act.append(final_act)
 
             next_obs, reward, cost, terminated, truncated, info = self.step(final_act)
 
             self._log_value(reward=reward, cost=cost, info=info)
 
-            if self._cfgs.algo_cfgs.use_cost:
-                logger.store({'Value/cost': value_c})
             logger.store({'Value/reward': value_r})
             logger.store({'Metrics/angle': cost})
 
@@ -202,13 +199,19 @@ def rollout(  # pylint: disable=too-many-locals
                         self._ep_cost[idx] = 0.0
                         self._ep_len[idx] = 0.0
 
-                        if step < 650:
-                            self.solver.update_GP_dynamics(obs=path_obs, act=path_act)
+                        if step < self._cfgs.algo_cfgs.update_dynamics_steps:
+                            self.solver.update_gp_dynamics(
+                                obs=torch.cat(path_obs),  # type: ignore
+                                act=torch.cat(path_act),  # type: ignore
+                            )
 
                         path_obs = []
                         path_act = []
                         obs, _ = self.reset()
-                        while abs(self._env.unwrapped.state[0]) > 1:
-                            obs, _ = self._env.reset()
                     buffer.finish_path(last_value_r, last_value_c, idx)
         self.first_iter = 0
+
+    @property
+    def gp_models(self) -> list[GaussianProcessRegressor]:
+        """Return the gp models to be saved."""
+        return self.solver.gp_models
diff --git a/omnisafe/adapter/beta_barrier_function_adapter.py b/omnisafe/adapter/beta_barrier_function_adapter.py
index ee8ccc298..844c0b4ce 100644
--- a/omnisafe/adapter/beta_barrier_function_adapter.py
+++ b/omnisafe/adapter/beta_barrier_function_adapter.py
@@ -12,10 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""BarrierFunction Adapter for OmniSafe."""
+"""Barrier Function Adapter for OmniSafe."""
 
 from __future__ import annotations
 
+from typing import Callable
+
 import numpy as np
 import torch
 from rich.progress import track
@@ -28,17 +30,28 @@
 from omnisafe.utils.config import Config
 
 
-def cbf(state: np.ndarray | None = None, eta: float = 0.99) -> tuple[np.ndarray, np.ndarray]:
-    """
-    Calculates CBF constraint set at a given state. Default is
-    the current state.
+# # pylint: disable-next=too-many-locals
+def cbf(state: np.ndarray, eta: float = 0.99) -> tuple[np.ndarray, np.ndarray]:
+    """Calculates the Control Barrier Function (CBF) constraints.
+
+    Args:
+        state (np.ndarray | None): A numpy array containing the pendulum's current angular position
+        (theta) and angular velocity (thetadot).
+        eta (float): A scaling factor used to adjust the safety bounds.
+
+    Returns:
+        tuple containing two elements: 1. The minimum control torque that keeps the pendulum within
+        the safety bounds. 2. The maximum control torque that keeps the pendulum within the safety
+        bounds.
+
+    Raises:
+        ValueError: If the `eta` value is not within the open interval (0, 1).
     """
     g = 9.8
     m = 1
     length = 1
     tau = 5e-2
     theta_safety_bounds = [-1.0, 1.0]
-    thetadot_safety_bounds = [-np.inf, np.inf]
     torque_bounds = [-15.0, 15.0]
     if (eta > 1 - 1e-3) or (eta < 1e-5):
         raise ValueError('eta should be inside (0, 1)')
@@ -47,7 +60,7 @@ def cbf(state: np.ndarray | None = None, eta: float = 0.99) -> tuple[np.ndarray,
 
     theta, thetadot = state[0], state[1]
     theta_min, theta_max = theta_safety_bounds[0], theta_safety_bounds[1]
-    thetadot_min, thetadot_max = thetadot_safety_bounds[0], thetadot_safety_bounds[1]
+    thetadot_min, thetadot_max = -np.inf, np.inf
     u_min1 = (1 / c2) * (
         ((1 / (tau**2)) * (-eta * (theta - theta_min) - tau * thetadot)) - c1 * np.sin(theta)
     )
@@ -61,27 +74,21 @@ def cbf(state: np.ndarray | None = None, eta: float = 0.99) -> tuple[np.ndarray,
     u_min = max(u_min1, u_min2, torque_bounds[0])
     u_max = min(u_max1, u_max2, torque_bounds[1])
 
-    u_min = torque_bounds[0]
-    u_max = torque_bounds[1]
+    return (u_min, u_max)
 
-    return [u_min, u_max]
 
-
-def vectorize_f(f: callable) -> callable:
-    """Converts a function `f` that operates on 1D numpy arrays and outputs pairs of scalars,
-    into a vectorized function that accepts batches of torch tensorized arrays and outputs
-    pairs of torch tensors.
+def vectorize_f(f: Callable) -> Callable:
+    """Vectorize the function.
 
     Args:
-        f (callable): A function that accepts 1D numpy arrays and returns a tuple (lower_bound, upper_bound), where both are scalars.
+        f (callable): A function that accepts 1D numpy arrays and returns a tuple (lower_bound, upper_bound).
 
     Returns:
         callable: A vectorized function that can process batches of torch tensors and return pairs of torch tensors.
     """
 
     def vectorized_f_(obs: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
-        """
-        Inner function to process the torch tensor batch.
+        """Inner function to process the torch tensor batch.
 
         Args:
             obs (torch.Tensor): A batch of observations as torch tensors.
@@ -94,13 +101,13 @@ def vectorized_f_(obs: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
         if len(obs.shape) == 1:
             batch_size = 1
             lbs, ubs = f(obs)
-            lbs = np.array(lbs)
-            ubs = np.array(ubs)
+            lbs = torch.as_tensor(lbs)
+            ubs = torch.as_tensor(ubs)
 
         else:
             batch_size = obs.shape[0]
-            lbs = np.zeros([batch_size, 1])
-            ubs = np.zeros([batch_size, 1])
+            lbs = torch.zeros([batch_size, 1])
+            ubs = torch.zeros([batch_size, 1])
             for i in range(batch_size):
                 lbs[i], ubs[i] = f(obs[i])
 
@@ -129,10 +136,7 @@ class BetaBarrierFunctionAdapter(OnPolicyAdapter):
     def __init__(self, env_id: str, num_envs: int, seed: int, cfgs: Config) -> None:
         """Initialize an instance of :class:`BarrierFunctionAdapter`."""
         super().__init__(env_id, num_envs, seed, cfgs)
-        self.solver = None
-        self.compensator = None
-        self.first_iter = 1
-        self.constraint_fn = vectorize_f(cbf)
+        self.constraint_fn: Callable = vectorize_f(cbf)
 
     def _wrapper(
         self,
@@ -183,8 +187,6 @@ def rollout(  # pylint: disable=too-many-locals
         """
         self._reset_log()
         obs, _ = self.reset()
-        while abs(self._env.unwrapped.state[0]) > 1:
-            obs, _ = self._env.reset()
         for step in track(
             range(steps_per_epoch),
             description=f'Processing rollout for epoch: {logger.current_epoch}...',
@@ -240,7 +242,4 @@ def rollout(  # pylint: disable=too-many-locals
                         self._ep_cost[idx] = 0.0
                         self._ep_len[idx] = 0.0
                         obs, _ = self.reset()
-                        while abs(self._env.unwrapped.state[0]) > 1:
-                            obs, _ = self._env.reset()
                     buffer.finish_path(last_value_r, last_value_c, idx)
-        self.first_iter = 0
diff --git a/omnisafe/adapter/offpolicy_barrier_function_adapter.py b/omnisafe/adapter/offpolicy_barrier_function_adapter.py
index e1353884b..49bf7909c 100644
--- a/omnisafe/adapter/offpolicy_barrier_function_adapter.py
+++ b/omnisafe/adapter/offpolicy_barrier_function_adapter.py
@@ -12,10 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""BarrierFunction Adapter for OmniSafe."""
+"""BarrierFunction OffPolicy Adapter for OmniSafe."""
 
 from __future__ import annotations
 
+from typing import Any
+
 import torch
 from sklearn.gaussian_process import GaussianProcessRegressor
 
@@ -30,14 +32,24 @@
 
 
 class OffPolicyBarrierFunctionAdapter(OffPolicyAdapter):
+    """OffPolicy Barrier Function Adapter for OmniSafe.
+
+    :class:`OffPolicyBarrierFunctionAdapter` is used to adapt the environment with CBF controller.
+
+    Args:
+        env_id (str): The environment id.
+        num_envs (int): The number of environments.
+        seed (int): The random seed.
+        cfgs (Config): The configuration.
+    """
 
     def __init__(self, env_id: str, num_envs: int, seed: int, cfgs: Config) -> None:
         """Initialize an instance of :class:`BarrierFunctionAdapter`."""
         super().__init__(env_id, num_envs, seed, cfgs)
-        self.solver = None
-        self.compensator = None
-        self.first_iter = 1
-        self.episode_rollout = {}
+        self.solver: PendulumSolver
+        self.compensator: BarrierCompensator
+        self.first_iter: int = 1
+        self.episode_rollout: dict[str, Any] = {}
         self.episode_rollout['obs'] = []
         self.episode_rollout['final_act'] = []
         self.episode_rollout['approx_compensating_act'] = []
@@ -100,16 +112,15 @@ def eval_policy(  # pylint: disable=too-many-locals
 
     def set_solver(self, solver: PendulumSolver) -> None:
         """Set the barrier function solver for Pendulum environment."""
-        self.solver: PendulumSolver = solver
+        self.solver = solver
 
     def set_compensator(self, compensator: BarrierCompensator) -> None:
         """Set the action compensator."""
-        self.compensator: BarrierCompensator = compensator
+        self.compensator = compensator
 
     def reset_gp_model(self) -> None:
         """Reset the gaussian processing model of barrier function solver."""
-        self.solver.gp_model_prev = self.solver.gp_model.copy()
-        self.solver.build_gp_model()
+        self.solver.reset_gp_model()
 
     def rollout(  # pylint: disable=too-many-locals
         self,
@@ -119,13 +130,23 @@ def rollout(  # pylint: disable=too-many-locals
         logger: Logger,
         use_rand_action: bool,
     ) -> None:
+        """Rollout in off-policy manner with barrier function controller.
+
+        Args:
+            rollout_step (int): Number of rollout steps.
+            agent (ConstraintActorCritic): Constraint actor-critic, including actor, reward critic,
+                and cost critic.
+            buffer (VectorOnPolicyBuffer): Vector on-policy buffer.
+            logger (Logger): Logger, to log ``EpRet``, ``EpCost``, ``EpLen``.
+            use_rand_action (bool): Whether to use random action.
+        """
         for _ in range(rollout_step):
             if use_rand_action:
-                act = (torch.rand(self.action_space.shape) * 2 - 1).unsqueeze(0).to(self._device)
+                act = (torch.rand(self.action_space.shape) * 2 - 1).unsqueeze(0).to(self._device)  # type: ignore
             else:
                 act = agent.actor.predict(self._current_obs, deterministic=False)
 
-            final_act = self.get_safe_action(obs=self._current_obs, act=act)
+            final_act = self.get_safe_action(self._current_obs, act)
 
             self.episode_rollout['obs'].append(self._current_obs)
             self.episode_rollout['final_act'].append(final_act)
@@ -146,15 +167,15 @@ def rollout(  # pylint: disable=too-many-locals
             for idx, done in enumerate(torch.logical_or(terminated, truncated)):
                 if done:
                     self._log_metrics(logger, idx)
-                    compensator_loss = self.compensator.train(
+                    compensator_loss = self.compensator.update(
                         torch.cat(self.episode_rollout['obs']),
                         torch.cat(self.episode_rollout['approx_compensating_act']),
                         torch.cat(self.episode_rollout['compensating_act']),
                     )
                     logger.store({'Value/Loss_compensator': compensator_loss.item()})
                     self.solver.update_gp_dynamics(
-                        obs=torch.cat(self.episode_rollout['obs']),
-                        act=torch.cat(self.episode_rollout['final_act']),
+                        obs=torch.cat(self.episode_rollout['obs']),  # type: ignore
+                        act=torch.cat(self.episode_rollout['final_act']),  # type: ignore
                     )
 
                     self.episode_rollout['obs'] = []
@@ -168,30 +189,41 @@ def rollout(  # pylint: disable=too-many-locals
                     if not self.first_iter:
                         self.reset_gp_model()
 
-    @torch.no_grad
     def get_safe_action(
         self,
         obs: torch.Tensor,
         act: torch.Tensor,
         is_eval: bool = False,
     ) -> torch.Tensor:
-        approx_compensating_act = self.compensator(obs=self._current_obs)
-        compensated_act_mean_raw = act + approx_compensating_act
+        """Computes a safe action by applying compensatory actions.
+
+        Args:
+            obs (torch.Tensor): The current observation from the environment.
+            act (torch.Tensor): The proposed action to be evaluated for safety.
+            is_eval (bool, optional): A flag to indicate whether this is an evaluation phase, defaulting to False.
 
-        if self.first_iter:
-            [f, g, x, std] = self.solver.get_gp_dynamics(obs, use_prev_model=False)
-        else:
-            [f, g, x, std] = self.solver.get_gp_dynamics(obs, use_prev_model=True)
+        Returns:
+            torch.Tensor: The safe action to be executed in the environment.
+        """
+        with torch.no_grad():
+            approx_compensating_act = self.compensator(obs=self._current_obs)
+            compensated_act_mean_raw = act + approx_compensating_act
+
+            if self.first_iter:
+                [f, g, x, std] = self.solver.get_gp_dynamics(obs, use_prev_model=False)
+            else:
+                [f, g, x, std] = self.solver.get_gp_dynamics(obs, use_prev_model=True)
 
-        compensating_act = self.solver.control_barrier(compensated_act_mean_raw, f, g, x, std)
-        safe_act = compensated_act_mean_raw + compensating_act
+            compensating_act = self.solver.control_barrier(compensated_act_mean_raw, f, g, x, std)
+            safe_act = compensated_act_mean_raw + compensating_act
 
-        if not is_eval:
-            self.episode_rollout['compensating_act'].append(compensating_act)
-            self.episode_rollout['approx_compensating_act'].append(approx_compensating_act)
+            if not is_eval:
+                self.episode_rollout['compensating_act'].append(compensating_act)
+                self.episode_rollout['approx_compensating_act'].append(approx_compensating_act)
 
         return safe_act
 
     @property
     def gp_models(self) -> list[GaussianProcessRegressor]:
+        """Return the gp models to be saved."""
         return self.solver.gp_models
diff --git a/omnisafe/adapter/robust_barrier_function_adapter.py b/omnisafe/adapter/robust_barrier_function_adapter.py
index 843676c7f..f56674319 100644
--- a/omnisafe/adapter/robust_barrier_function_adapter.py
+++ b/omnisafe/adapter/robust_barrier_function_adapter.py
@@ -32,12 +32,22 @@
 
 
 class RobustBarrierFunctionAdapter(OffPolicyAdapter):
+    """Off Policy Robust Barrier Function Adapter for OmniSafe.
+
+    :class:`RobustBarrierFunctionAdapter` is used to adapt the environment with RCBF controller.
+
+    Args:
+        env_id (str): The environment id.
+        num_envs (int): The number of environments.
+        seed (int): The random seed.
+        cfgs (Config): The configuration.
+    """
 
     def __init__(self, env_id: str, num_envs: int, seed: int, cfgs: Config) -> None:
         """Initialize an instance of :class:`BarrierFunctionAdapter`."""
         super().__init__(env_id, num_envs, seed, cfgs)
-        self.solver = None
-        self.compensator = None
+        self.solver: CBFQPLayer
+        self.dynamics_model: DynamicsModel
         self._current_steps = 0
         self._num_episodes = 0
 
@@ -70,13 +80,13 @@ def _wrapper(
 
     def set_solver(self, solver: CBFQPLayer) -> None:
         """Set the barrier function solver for Pendulum environment."""
-        self.solver: CBFQPLayer = solver
-        self.solver.env = self._env
+        self.solver = solver
+        self.solver.env = self._env  # type: ignore
 
     def set_dynamics_model(self, dynamics_model: DynamicsModel) -> None:
         """Set the dynamics model."""
         self.dynamics_model = dynamics_model
-        self.dynamics_model.env = self._env
+        self.dynamics_model.env = self._env  # type: ignore
 
     def eval_policy(  # pylint: disable=too-many-locals
         self,
@@ -143,7 +153,7 @@ def rollout(  # pylint: disable=too-many-locals
             state = self.dynamics_model.get_state(self._current_obs)
             self._current_steps += 1
             if use_rand_action:
-                act = (torch.rand(self.action_space.shape) * 2 - 1).unsqueeze(0).to(self._device)
+                act = (torch.rand(self.action_space.shape) * 2 - 1).unsqueeze(0).to(self._device)  # type: ignore
             else:
                 act = agent.step(self._current_obs, deterministic=False)
 
@@ -182,12 +192,21 @@ def rollout(  # pylint: disable=too-many-locals
 
     @property
     def safe_action_space(self) -> OmnisafeSpace:
+        """Return the action space in the safe domain."""
         if hasattr(self._env, 'safe_action_space'):
             return self._env.safe_action_space
         return self._env.action_space
 
     def get_safe_action(self, obs: torch.Tensor, act: torch.Tensor) -> torch.Tensor:
+        """Computes a safe action by applying robust barrier function.
 
+        Args:
+            obs (torch.Tensor): The current observation from the environment.
+            act (torch.Tensor): The proposed action to be evaluated for safety.
+
+        Returns:
+            torch.Tensor: The safe action to be executed in the environment.
+        """
         state_batch = self.dynamics_model.get_state(obs)
         mean_pred_batch, sigma_pred_batch = self.dynamics_model.predict_disturbance(state_batch)
 
@@ -199,4 +218,5 @@ def get_safe_action(self, obs: torch.Tensor, act: torch.Tensor) -> torch.Tensor:
         )
 
     def __getattr__(self, name: str) -> Any:
+        """Return the unwrapped environment attributes."""
         return getattr(self._env, name)
diff --git a/omnisafe/algorithms/off_policy/ddpg.py b/omnisafe/algorithms/off_policy/ddpg.py
index f0c633220..2d6bad948 100644
--- a/omnisafe/algorithms/off_policy/ddpg.py
+++ b/omnisafe/algorithms/off_policy/ddpg.py
@@ -558,7 +558,7 @@ def _log_when_not_update(self) -> None:
                 },
             )
 
-    def _log_what_to_save(self) -> dict[str, Any]:
+    def _log_what_to_save(self) -> None:
         """Define what need to be saved below."""
         what_to_save: dict[str, Any] = {}
 
diff --git a/omnisafe/algorithms/off_policy/ddpg_cbf.py b/omnisafe/algorithms/off_policy/ddpg_cbf.py
index ad1306d5b..32b27be1d 100644
--- a/omnisafe/algorithms/off_policy/ddpg_cbf.py
+++ b/omnisafe/algorithms/off_policy/ddpg_cbf.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 # ==============================================================================
 """Implementation of the DDPG algorithm with Control Barrier Function."""
+# mypy: ignore-errors
 
 
 from __future__ import annotations
@@ -34,12 +35,13 @@
 @registry.register
 # pylint: disable-next=too-many-instance-attributes, too-few-public-methods
 class DDPGCBF(DDPG):
-    """The Soft Actor-Critic algorithm with Control Barrier Function.
+    """The DDPG algorithm with CBF.
 
     References:
-        - Title: Soft Actor-Critic: Off-Policy Maximum Entropy Deep Reinforcement Learning with a Stochastic Actor
-        - Authors: Tuomas Haarnoja, Aurick Zhou, Pieter Abbeel, Sergey Levine.
-        - URL: `DDPG <https://arxiv.org/abs/1801.01290>`_
+        - Title: End-to-end safe reinforcement learning through barrier functions for
+        safety-critical continuous control tasks
+        - Authors: R Cheng, G Orosz, RM Murray, JW Burdick.
+        - URL: `DDPGCBF <https://ojs.aaai.org/index.php/AAAI/article/view/4213/4091>`_
     """
 
     def _init_env(self) -> None:
@@ -95,14 +97,14 @@ def _init(self) -> None:
         )
 
     def _init_log(self) -> None:
-        # """Log the DDPGRCBF specific information.
-
-        # +----------------------------+--------------------------+
-        # | Things to log              | Description              |
-        # +============================+==========================+
-        # | Metrics/LagrangeMultiplier | The Lagrange multiplier. |
-        # +----------------------------+--------------------------+
-        # """
+        """Log the DDPGCBF specific information.
+
+        +----------------------------+---------------------------------+
+        | Things to log              | Description                     |
+        +============================+=================================+
+        | Value/Loss_compensator     | The Loss of action compensator. |
+        +----------------------------+---------------------------------+
+        """
         super()._init_log()
         self._logger.register_key('Value/Loss_compensator')
 
diff --git a/omnisafe/algorithms/off_policy/sac_rcbf.py b/omnisafe/algorithms/off_policy/sac_rcbf.py
index 9fbd20a39..1e9547369 100644
--- a/omnisafe/algorithms/off_policy/sac_rcbf.py
+++ b/omnisafe/algorithms/off_policy/sac_rcbf.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 # ==============================================================================
 """Implementation of the Soft Actor-Critic algorithm with Robust Control Barrier Function."""
-
+# mypy: ignore-errors
 
 from __future__ import annotations
 
@@ -175,8 +175,8 @@ def _specific_save(self) -> None:
             train_y = self._env.dynamics_model.train_y
             disturb_estimators = self._env.dynamics_model.disturb_estimators
             weights = []
-            for i in range(len(disturb_estimators)):
-                weights.append(disturb_estimators[i].model.state_dict())
+            for disturb_estimator in disturb_estimators:
+                weights.append(disturb_estimator.model.state_dict())
             torch.save(weights, os.path.join(path, f'gp_models_{self._logger.current_epoch}.pkl'))
             torch.save(
                 train_x,
diff --git a/omnisafe/algorithms/on_policy/barrier_function/ppo_cbf.py b/omnisafe/algorithms/on_policy/barrier_function/ppo_cbf.py
index 24b27d939..b77c36c76 100644
--- a/omnisafe/algorithms/on_policy/barrier_function/ppo_cbf.py
+++ b/omnisafe/algorithms/on_policy/barrier_function/ppo_cbf.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Implementation of the PPO algorithm with Control Barrier Function."""
+"""Implementation of the PPO algorithm with Control Barrier Function and Beta Actor."""
 
 from __future__ import annotations
 
@@ -26,6 +26,14 @@
 
 @registry.register
 class PPOBetaCBF(PPO):
+    """The PPO algorithm with CBF and Beta Actor.
+
+    References:
+        - Title: Sampling-based Safe Reinforcement Learning for Nonlinear Dynamical Systems
+        - Authors: Wesley A. Suttle, Vipul K. Sharma, Krishna C. Kosaraju, S. Sivaranjani, Ji Liu,
+            Vijay Gupta, Brian M. Sadler.
+        - URL: `PPOBetaCBF <https://proceedings.mlr.press/v238/suttle24a/suttle24a.pdf>`_
+    """
 
     def _init_log(self) -> None:
         super()._init_log()
diff --git a/omnisafe/algorithms/on_policy/barrier_function/trpo_cbf.py b/omnisafe/algorithms/on_policy/barrier_function/trpo_cbf.py
index 3fceec4f7..72238e41a 100644
--- a/omnisafe/algorithms/on_policy/barrier_function/trpo_cbf.py
+++ b/omnisafe/algorithms/on_policy/barrier_function/trpo_cbf.py
@@ -13,9 +13,13 @@
 # limitations under the License.
 # ==============================================================================
 """Implementation of the TRPO algorithm with Control Barrier Function."""
+# mypy: ignore-errors
 
 from __future__ import annotations
 
+import os
+
+import joblib
 import torch
 from torch.utils.data import DataLoader, TensorDataset
 
@@ -25,14 +29,30 @@
 from omnisafe.common.barrier_comp import BarrierCompensator
 from omnisafe.common.barrier_solver import PendulumSolver
 from omnisafe.utils import distributed
+from omnisafe.utils.distributed import get_rank
 
 
 @registry.register
 class TRPOCBF(TRPO):
+    """The TRPO algorithm with CBF.
+
+    References:
+        - Title: End-to-end safe reinforcement learning through barrier functions for
+        safety-critical continuous control tasks
+        - Authors: R Cheng, G Orosz, RM Murray, JW Burdick.
+        - URL: `TRPOCBF <https://ojs.aaai.org/index.php/AAAI/article/view/4213/4091>`_
+    """
 
     def _init_log(self) -> None:
+        """Log the TRPOCBF specific information.
+
+        +----------------------------+---------------------------------+
+        | Things to log              | Description                     |
+        +============================+=================================+
+        | Value/Loss_compensator     | The Loss of action compensator. |
+        +----------------------------+---------------------------------+
+        """
         super()._init_log()
-        self._logger.register_key('Metrics/angle', min_and_max=True)
         self._logger.register_key('Value/Loss_compensator')
 
     def _init_env(self) -> None:
@@ -110,7 +130,7 @@ def _update(self) -> None:
         )
 
         self._update_actor(obs, act, logp, adv_r, adv_c)
-        compensator_loss = self._env.compensator.train(
+        compensator_loss = self._env.compensator.update(
             observation=obs,
             approx_compensating_act=approx_compensating_act,
             compensating_act=compensating_act,
@@ -138,3 +158,15 @@ def _update(self) -> None:
                 'Value/Loss_compensator': compensator_loss.item(),
             },
         )
+
+    def _specific_save(self) -> None:
+        """Save some algorithms specific models per epoch."""
+        super()._specific_save()
+        if get_rank() == 0:
+            path = os.path.join(
+                self._logger.log_dir,
+                'gp_model_save',
+                f'gaussian_process_regressor_{self._logger.current_epoch}.pkl',
+            )
+            os.makedirs(os.path.dirname(path), exist_ok=True)
+            joblib.dump(self._env.gp_models, path)
diff --git a/omnisafe/algorithms/on_policy/base/policy_gradient.py b/omnisafe/algorithms/on_policy/base/policy_gradient.py
index e0792d6ab..831076de6 100644
--- a/omnisafe/algorithms/on_policy/base/policy_gradient.py
+++ b/omnisafe/algorithms/on_policy/base/policy_gradient.py
@@ -180,12 +180,7 @@ def _init_log(self) -> None:
             config=self._cfgs,
         )
 
-        what_to_save: dict[str, Any] = {}
-        what_to_save['pi'] = self._actor_critic.actor
-        if self._cfgs.algo_cfgs.obs_normalize:
-            obs_normalizer = self._env.save()['obs_normalizer']
-            what_to_save['obs_normalizer'] = obs_normalizer
-        self._logger.setup_torch_saver(what_to_save)
+        self._log_what_to_save()
         self._logger.torch_save()
 
         self._logger.register_key(
@@ -296,6 +291,7 @@ def learn(self) -> tuple[float, float, float]:
                 epoch + 1
             ) == self._cfgs.train_cfgs.epochs:
                 self._logger.torch_save()
+                self._specific_save()
 
         ep_ret = self._logger.get_stats('Metrics/EpRet')[0]
         ep_cost = self._logger.get_stats('Metrics/EpCost')[0]
@@ -586,3 +582,17 @@ def _loss_pi(
             },
         )
         return loss
+
+    def _log_what_to_save(self) -> None:
+        """Define what need to be saved below."""
+        what_to_save: dict[str, Any] = {}
+
+        what_to_save['pi'] = self._actor_critic.actor
+        if self._cfgs.algo_cfgs.obs_normalize:
+            obs_normalizer = self._env.save()['obs_normalizer']
+            what_to_save['obs_normalizer'] = obs_normalizer
+
+        self._logger.setup_torch_saver(what_to_save)
+
+    def _specific_save(self) -> None:
+        """Save some algorithms specific models per epoch."""
diff --git a/omnisafe/common/barrier_comp.py b/omnisafe/common/barrier_comp.py
index 1a27d5863..40381ccd3 100644
--- a/omnisafe/common/barrier_comp.py
+++ b/omnisafe/common/barrier_comp.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
+"""Implementation of Compensator Used in Control Barrier Function."""
+
 
 from __future__ import annotations
 
@@ -43,6 +45,7 @@ class BarrierCompensator(torch.nn.Module):
     """
 
     def __init__(self, obs_dim: int, act_dim: int, cfgs: Config) -> None:
+        """Initialize the action compensator."""
         super().__init__()
         self._cfgs: Config = cfgs
         self.model: torch.nn.Module = build_mlp_network(
@@ -63,7 +66,7 @@ def forward(self, obs: torch.Tensor) -> torch.Tensor:
         """
         return self.model(obs)
 
-    def train(
+    def update(
         self,
         observation: torch.Tensor,
         approx_compensating_act: torch.Tensor,
diff --git a/omnisafe/common/barrier_solver.py b/omnisafe/common/barrier_solver.py
index b00af906e..ea287b4ad 100644
--- a/omnisafe/common/barrier_solver.py
+++ b/omnisafe/common/barrier_solver.py
@@ -14,6 +14,10 @@
 # ==============================================================================
 """Implementation of the Control Barrier Function Solver."""
 
+# pylint: disable=invalid-name,wrong-spelling-in-docstring
+# mypy: ignore-errors
+
+
 from __future__ import annotations
 
 import warnings
@@ -27,6 +31,7 @@
 from sklearn.gaussian_process.kernels import ConstantKernel as C
 
 
+# pylint: disable-next=too-many-instance-attributes
 class PendulumSolver:
     """Solver for the pendulum problem using Gaussian Process models.
 
@@ -38,6 +43,7 @@ class PendulumSolver:
         device (str): Device to run the computations on.
     """
 
+    # pylint: disable-next=invalid-name
     def __init__(
         self,
         action_size: int = 1,
@@ -63,9 +69,11 @@ def __init__(
         self._device = device
         self._gamma_b = 0.5
         self._kd = 1.5
+        self.gp_model_prev: list[GaussianProcessRegressor, GaussianProcessRegressor]
+        self.gp_model: list[GaussianProcessRegressor, GaussianProcessRegressor]
+
         self._build_barrier()
         self.build_gp_model()
-        self.gp_model_prev = None
         warnings.filterwarnings('ignore')
 
     def build_gp_model(self, save_dir: str | None = None) -> None:
@@ -80,6 +88,7 @@ def build_gp_model(self, save_dir: str | None = None) -> None:
             else:
                 gp_list = joblib.load(save_dir)
         self.gp_model = gp_list
+        self.gp_model_prev = gp_list.copy()
 
     @property
     def gp_models(self) -> list[GaussianProcessRegressor]:
@@ -95,7 +104,7 @@ def _build_barrier(self) -> None:
         self.h3 = np.array([-1, 0.01])
         self.h4 = np.array([-1, -0.01])
 
-    def control_barrier(
+    def control_barrier(  # pylint: disable=invalid-name
         self,
         original_action: torch.Tensor,
         f: np.ndarray,
@@ -103,9 +112,7 @@ def control_barrier(
         x: np.ndarray,
         std: np.ndarray,
     ) -> torch.Tensor:
-        """
-        Adjusts the original action using a control barrier function to ensure
-        that the action complies with the system's physical constraints.
+        """Adjusts the original action using a control barrier function.
 
         Args:
             original_action (torch.Tensor): The original action proposed by the RL algorithm.
@@ -117,7 +124,6 @@ def control_barrier(
         Returns:
             torch.Tensor: The adjusted action that respects the system's constraints.
         """
-
         # Define gamma for the barrier function
         gamma_b = 0.5
         kd = 1.5
@@ -196,10 +202,9 @@ def control_barrier(
 
         return torch.as_tensor(u_bar[0], dtype=torch.float32, device=self._device).unsqueeze(dim=0)
 
+    # pylint: disable-next=attribute-defined-outside-init,import-outside-toplevel,invalid-name
     def get_dynamics(self, obs: list[float], original_action: float) -> np.ndarray:
-        """
-        Calculates the dynamics of the system based on the current observation
-        and the original action.
+        """Calculates the dynamics of the system.
 
         Args:
             obs (list[float]): The current observation of the system state.
@@ -208,7 +213,6 @@ def get_dynamics(self, obs: list[float], original_action: float) -> np.ndarray:
         Returns:
             np.ndarray: The calculated dynamics of the system.
         """
-
         dt = 0.05  # Time step
         G = 10  # Gravitational constant
         m = 2  # Mass
@@ -233,8 +237,7 @@ def get_dynamics(self, obs: list[float], original_action: float) -> np.ndarray:
         return np.squeeze(f)
 
     def update_gp_dynamics(self, obs: np.ndarray, act: np.ndarray) -> None:
-        """
-        Updates the Gaussian Process (GP) dynamics model based on observed states and actions.
+        """Updates the Gaussian Process (GP) dynamics model based on observed states and actions.
 
         Args:
             obs (np.ndarray): Observed states.
@@ -260,8 +263,7 @@ def update_gp_dynamics(self, obs: np.ndarray, act: np.ndarray) -> None:
         self.gp_model[1].fit(S, err[:, 1])
 
     def get_gp_dynamics(self, obs: torch.Tensor, use_prev_model: bool) -> list[np.ndarray]:
-        """
-        Retrieves the gp dynamics based on the current observation.
+        """Retrieves the gp dynamics based on the current observation.
 
         Args:
             obs (torch.Tensor): Current state observation.
@@ -278,7 +280,7 @@ def get_gp_dynamics(self, obs: torch.Tensor, use_prev_model: bool) -> list[np.nd
         obs = np.squeeze(obs)
         theta = np.arctan2(obs[1], obs[0])
         theta_dot = obs[2]
-        x = np.array([theta, theta_dot])  # 这个x估计就对应state
+        x = np.array([theta, theta_dot])
         f_nom = np.array(
             [
                 -3 * G / (2 * length) * np.sin(theta + np.pi) * dt**2
@@ -307,3 +309,8 @@ def get_gp_dynamics(self, obs: torch.Tensor, use_prev_model: bool) -> list[np.nd
             np.squeeze(x),
             np.array([np.squeeze(std1), np.squeeze(std2)]),
         ]
+
+    def reset_gp_model(self) -> None:
+        """Reset the gaussian processing model of barrier function solver."""
+        self.gp_model_prev = self.gp_model.copy()
+        self.build_gp_model()
diff --git a/omnisafe/common/robust_barrier_solver.py b/omnisafe/common/robust_barrier_solver.py
index 639ae8d3a..3e14d002c 100644
--- a/omnisafe/common/robust_barrier_solver.py
+++ b/omnisafe/common/robust_barrier_solver.py
@@ -1,19 +1,46 @@
+# Copyright 2023 OmniSafe Team. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Robust Control Barrier Function Solver for OmniSafe."""
+
+
+# mypy: ignore-errors
+# pylint: disable=invalid-name,wrong-spelling-in-docstring
 from __future__ import annotations
 
 from typing import Any
 
 import gymnasium as gym
-import numpy as np
 import torch
 from qpth.qp import QPFunction
 
-from omnisafe.common.utils import sort_vertices_cclockwise, to_tensor
+from omnisafe.utils.tools import to_tensor
 
 
 DYNAMICS_MODE = {'Unicycle': {'n_s': 3, 'n_u': 2}}
 
 
 class CBFQPLayer:
+    """CBFQLayer for robust control barrier function solver.
+
+    Args:
+        env (gym.Env): The Gym environment to interact with.
+        device (str, optional): The device type, such as 'cpu' or 'gpu'. Defaults to 'cpu'.
+        gamma_b (float, optional): The gamma parameter. Defaults to 20.
+        k_d (float, optional): The confidence parameter desired. Defaults to 3.0.
+        l_p (float, optional): Some additional layer parameter, purpose unspecified. Defaults to 0.03.
+    """
 
     def __init__(
         self,
@@ -23,15 +50,7 @@ def __init__(
         k_d: float = 3.0,
         l_p: float = 0.03,
     ) -> None:
-        """Initializes a CBFLayer instance with specified parameters and environment.
-
-        Args:
-            env (gym.Env): The Gym environment to interact with.
-            device (str, optional): The device type, such as 'cpu' or 'gpu'. Defaults to 'cpu'.
-            gamma_b (float, optional): The gamma parameter of the control barrier certificate. Defaults to 20.
-            k_d (float, optional): The confidence parameter desired (e.g., 2.0 corresponds to ~95% confidence). Defaults to 3.0.
-            l_p (float, optional): Some additional layer parameter, purpose unspecified. Defaults to 0.03.
-        """
+        """Initializes a CBFLayer instance with specified parameters and environment."""
         self.device = torch.device(device)
         self.env = env
         self.u_min, self.u_max = self.get_control_bounds()
@@ -54,7 +73,6 @@ def get_safe_action(
             action_batch (torch.Tensor): Nominal action batch, tensor or ndarray.
             mean_pred_batch (torch.Tensor): Mean disturbance predictions, tensor or ndarray.
             sigma_batch (torch.Tensor): Standard deviations of disturbances, tensor or ndarray.
-            cbf_info_batch (torch.Tensor, optional): Additional control barrier function information batch, tensor or ndarray.
 
         Returns:
             torch.Tensor: Safe actions adjusted for given constraints and uncertainties.
@@ -96,15 +114,14 @@ def solve_qp(
             subject to G[u,eps]^T <= h
 
         Args:
-            Ps (torch.Tensor): Quadratic cost matrix for each problem, with shape (batch_size, n_u+1, n_u+1).
-            qs (torch.Tensor): Linear cost vector for each problem, with shape (batch_size, n_u+1).
-            Gs (torch.Tensor): Inequality constraint matrix for each problem, with shape (batch_size, num_ineq_constraints, n_u+1).
-            hs (torch.Tensor): Inequality constraint vector for each problem, with shape (batch_size, num_ineq_constraints).
+            Ps (torch.Tensor): Quadratic cost matrix for each problem.
+            qs (torch.Tensor): Linear cost vector for each problem.
+            Gs (torch.Tensor): Inequality constraint matrix for each problem.
+            hs (torch.Tensor): Inequality constraint vector for each problem.
 
         Returns:
             The safe action for each problem, omitting the slack variable, with dimension (batch_size, n_u).
         """
-
         Ghs = torch.cat((Gs, hs.unsqueeze(2)), -1)
         Ghs_norm = torch.max(torch.abs(Ghs), dim=2, keepdim=True)[0]
         Gs /= Ghs_norm
@@ -139,8 +156,8 @@ def cbf_layer(
         Args:
             Qs (torch.Tensor): Quadratic cost matrix for each problem.
             ps (torch.Tensor): Linear cost vector for each problem.
-            Gs (torch.Tensor): Inequality constraint matrix for each problem, shape (batch_size, num_ineq_constraints, num_vars).
-            hs (torch.Tensor): Inequality constraint vector for each problem, shape (batch_size, num_ineq_constraints).
+            Gs (torch.Tensor): Inequality constraint matrix for each problem.
+            hs (torch.Tensor): Inequality constraint vector for each problem.
             As (torch.Tensor, optional): Equality constraint matrix. Defaults to None.
             bs (torch.Tensor, optional): Equality constraint vector. Defaults to None.
             solver_args (dict, optional): Dictionary of solver arguments. Defaults to None.
@@ -148,7 +165,6 @@ def cbf_layer(
         Returns:
             Result of the QP solver for each problem.
         """
-
         if solver_args is None:
             solver_args = {}
 
@@ -165,6 +181,7 @@ def cbf_layer(
             bs,
         ).float()
 
+    # pylint: disable-next=too-many-locals
     def get_cbf_qp_constraints(
         self,
         state_batch: torch.Tensor,
@@ -180,10 +197,10 @@ def get_cbf_qp_constraints(
             subject to G[u,eps]^T <= h
 
         Args:
-            state_batch (torch.Tensor): Current state batch. Refer to `dynamics.py` for specifics on each dynamic.
+            state_batch (torch.Tensor): Current state batch.
             action_batch (torch.Tensor): Nominal control input batch.
-            mean_pred_batch (torch.Tensor): Mean disturbance prediction state batch, dimensions (n_s, n_u).
-            sigma_pred_batch (torch.Tensor): Standard deviation of the additive disturbance after undergoing the output dynamics.
+            mean_pred_batch (torch.Tensor): Mean disturbance prediction state batch.
+            sigma_pred_batch (torch.Tensor): Standard deviation of the additive disturbance.
             gamma_b (float, optional): CBF parameter for the class-Kappa function. Defaults to 1.0.
 
         Returns:
@@ -246,65 +263,15 @@ def get_cbf_qp_constraints(
             hs = 1e3 * torch.ones((batch_size, num_cbfs), device=self.device)
             dhdps = torch.zeros((batch_size, num_cbfs, 2), device=self.device)
             hazards = self.env.hazards
-            for i in range(len(hazards)):
-                if hazards[i]['type'] == 'circle':
-                    obs_loc = to_tensor(hazards[i]['location'], torch.FloatTensor, self.device)
+            for i, hazard in enumerate(hazards):
+                if hazard['type'] == 'circle':
+                    obs_loc = to_tensor(hazard['location'], torch.FloatTensor, self.device)
                     hs[:, i] = 0.5 * (
-                        torch.sum((ps - obs_loc) ** 2, dim=1) - (hazards[i]['radius'] + buffer) ** 2
+                        torch.sum((ps - obs_loc) ** 2, dim=1) - (hazard['radius'] + buffer) ** 2
                     )
                     dhdps[:, i, :] = ps - obs_loc
-                elif hazards[i]['type'] == 'polygon':
-                    vertices = sort_vertices_cclockwise(hazards[i]['vertices'])
-                    segments = np.diff(vertices, axis=0, append=vertices[[0]])
-                    segments = to_tensor(segments, torch.FloatTensor, self.device)
-                    vertices = to_tensor(vertices, torch.FloatTensor, self.device)
-                    for j in range(segments.shape[0]):
-                        dot_products = torch.matmul(
-                            ps - vertices[j : j + 1],
-                            segments[j],
-                        ) / torch.sum(segments[j] ** 2)
-                        mask0_ = dot_products < 0
-                        mask1_ = dot_products > 1
-                        mask_ = torch.logical_and(dot_products >= 0, dot_products <= 1)
-                        dists2seg = torch.zeros(batch_size)
-                        if mask0_.sum() > 0:
-                            dists2seg[mask0_] = torch.linalg.norm(ps[mask0_] - vertices[[j]], dim=1)
-                        if mask1_.sum() > 0:
-                            dists2seg[mask1_] = torch.linalg.norm(
-                                ps[mask1_] - vertices[[(j + 1) % segments.shape[0]]],
-                                dim=1,
-                            )
-                        if mask_.sum() > 0:
-                            dists2seg[mask_] = torch.linalg.norm(
-                                dot_products[mask_, None] * segments[j].tile((torch.sum(mask_), 1))
-                                + vertices[[j]]
-                                - ps[mask_],
-                                dim=1,
-                            )
-                        hs_ = 0.5 * ((dists2seg**2) + 0.5 * buffer)
-                        dhdps_ = torch.zeros((batch_size, 2))
-                        if mask0_.sum() > 0:
-                            dhdps_[mask0_] = ps[mask0_] - vertices[[j]]
-                        if mask1_.sum() > 0:
-                            dhdps_[mask1_] = ps[mask1_] - vertices[[(j + 1) % segments.shape[0]]]
-                        if mask_.sum() > 0:
-                            normal_vec = torch.tensor([segments[j][1], -segments[j][0]])
-                            normal_vec /= torch.linalg.norm(normal_vec)
-                            dhdps_[mask_] = (ps[mask_] - vertices[j]).matmul(
-                                normal_vec,
-                            ) * normal_vec.view((1, 2)).repeat(torch.sum(mask_), 1)
-                        idxs_to_update = torch.nonzero(hs[:, i] - hs_ > 0)
-                        # Update the actual hs to be used in the constraints
-                        if idxs_to_update.shape[0] > 0:
-                            hs[idxs_to_update, i] = hs_[idxs_to_update]
-                            # Compute dhdhps for those indices
-                            dhdps[idxs_to_update, i, :] = dhdps_[idxs_to_update, :]
                 else:
-                    raise Exception(
-                        'Only obstacles of type `circle` or `polygon` are supported, got: {}'.format(
-                            hazards[i]['type'],
-                        ),
-                    )
+                    raise NotImplementedError
 
             n_u = action_batch.shape[1]
             num_constraints = num_cbfs + 2 * n_u
@@ -345,12 +312,11 @@ def get_cbf_qp_constraints(
         return P, q, G, h
 
     def get_control_bounds(self) -> tuple[torch.Tensor, torch.Tensor]:
-        """
+        """Obtain the action bounds.
 
         Returns:
             Action bounds, i.e., min control input and max control input.
         """
-
         u_min = torch.tensor(self.env.safe_action_space.low).to(self.device)
         u_max = torch.tensor(self.env.safe_action_space.high).to(self.device)
 
diff --git a/omnisafe/common/robust_gp_model.py b/omnisafe/common/robust_gp_model.py
index 3380d1f2d..885a50389 100644
--- a/omnisafe/common/robust_gp_model.py
+++ b/omnisafe/common/robust_gp_model.py
@@ -1,3 +1,21 @@
+# Copyright 2023 OmniSafe Team. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Implementation of Dynamics Model Based on GPyTorch."""
+# mypy: ignore-errors
+
+
 from __future__ import annotations
 
 import os
@@ -14,8 +32,8 @@
 from gpytorch.means import ZeroMean
 from gpytorch.priors import NormalPrior
 
-from omnisafe.common.utils import to_numpy, to_tensor
 from omnisafe.typing import DEVICE_CPU
+from omnisafe.utils.tools import to_tensor
 
 
 DYNAMICS_MODE = {'Unicycle': {'n_s': 3, 'n_u': 2}}
@@ -23,8 +41,7 @@
 
 
 class BaseGPy(gpytorch.models.ExactGP):
-    """
-    A Gaussian Process (GP) model using a zero mean function and a scaled RBF kernel with priors.
+    """A Gaussian Process (GP) model using a zero mean function and a scaled RBF kernel with priors.
 
     This class extends gpytorch.models.ExactGP, specifically designed for use in
     disturbance estimation tasks.
@@ -57,6 +74,7 @@ def __init__(
         self.covar_module.base_kernel.lengthscale = 1e5
         self.covar_module.outputscale = prior_std + 1e-6
 
+    # pylint: disable=arguments-differ
     def forward(self, x: torch.Tensor) -> MultivariateNormal:
         """Forward pass through the GP model to produce a multivariate normal distribution.
 
@@ -85,7 +103,7 @@ class GPyDisturbanceEstimator:
         train_x (torch.Tensor): Training data features. If not a tensor, it will be converted.
         train_y (torch.Tensor): Training data targets. If not a tensor, it will be converted.
         prior_std (float): Standard deviation of the prior distribution.
-        likelihood (Optional[gpytorch.likelihoods.Likelihood]): A GPyTorch likelihood. If None, a default GaussianLikelihood is used.
+        likelihood (Optional[gpytorch.likelihoods.Likelihood]): A GPyTorch likelihood.
         device (Optional[torch.device]): The torch device. Defaults to CPU if None.
     """
 
@@ -97,6 +115,7 @@ def __init__(
         likelihood: gpytorch.likelihoods.Likelihood | None = None,
         device: torch.device = DEVICE_CPU,
     ) -> None:
+        """Initialize the GPyDisturbanceEstimator."""
         self.device = device if device else torch.device('cpu')
 
         if not torch.is_tensor(train_x):
@@ -140,8 +159,7 @@ def train(self, training_iter: int, verbose: bool = False) -> None:
             optimizer.step()
 
     def predict(self, test_x: torch.Tensor) -> dict[str, torch.Tensor | np.ndarray]:
-        """
-        Makes predictions on new data.
+        """Makes predictions on new data.
 
         Args:
             test_x (torch.Tensor): Test data features. If not a tensor, it will be converted.
@@ -174,6 +192,7 @@ def predict(self, test_x: torch.Tensor) -> dict[str, torch.Tensor | np.ndarray]:
         return pred_dict
 
 
+# pylint: disable-next=too-many-instance-attributes
 class DynamicsModel:
     """Initializes the DynamicsModel with a gym environment.
 
@@ -191,6 +210,7 @@ def __init__(
         l_p: float = 0.03,
         device: str = 'cpu',
     ) -> None:
+        """Initializes the DynamicsModel with a gym environment."""
         self.env = env
         self.get_f, self.get_g = self.get_dynamics()
         self.n_s = DYNAMICS_MODE[self.env.dynamics_mode]['n_s']
@@ -209,8 +229,7 @@ def __init__(
         self.device = torch.device(device)
 
     def predict_next_state(self, state_batch: np.ndarray, u_batch: np.ndarray) -> np.ndarray:
-        """
-        Predicts the next state given the current state and action batch.
+        """Predicts the next state given the current state and action batch.
 
         Args:
             state_batch (np.ndarray): The batch of current states.
@@ -241,7 +260,7 @@ def get_dynamics(self) -> tuple[Callable, Callable]:
         """Retrieves the dynamics functions for drift and control based on the environment's dynamics mode.
 
         Returns:
-            tuple: A tuple containing two callables, `get_f` and `get_g`, which compute the drift and control dynamics respectively.
+            tuple: A tuple containing two callable methods, `get_f` and `get_g`.
         """
         if self.env.dynamics_mode == 'Unicycle':
 
@@ -257,27 +276,23 @@ def get_g(state_batch: np.ndarray) -> np.ndarray:
                 return g_x
 
         else:
-            raise Exception('Unknown Dynamics mode.')
+            raise NotImplementedError('Unknown Dynamics mode.')
 
         return get_f, get_g
 
-    def get_state(self, obs: np.ndarray) -> np.ndarray:
-        """
-        Processes the raw observations from the environment and returns the corresponding state representation.
+    def get_state(self, obs: torch.Tensor) -> torch.Tensor:
+        """Processes the raw observations from the environment.
 
         Args:
-            obs (np.ndarray): The environment observations.
+            obs (torch.Tensor): The environment observations.
 
         Returns:
-            np.ndarray: The processed state of the system.
+            torch.Tensor: The processed state of the system.
         """
         expand_dims = len(obs.shape) == 1
-        is_tensor = torch.is_tensor(obs)
-
-        if is_tensor:
-            dtype = obs.dtype
-            device = obs.device
-            obs = obs.cpu().numpy() if obs.is_cuda else obs.numpy()
+        dtype = obs.dtype
+        device = obs.device
+        obs = obs.cpu().numpy() if obs.is_cuda else obs.numpy()
 
         if expand_dims:
             obs = np.expand_dims(obs, 0)
@@ -289,14 +304,12 @@ def get_state(self, obs: np.ndarray) -> np.ndarray:
             state_batch[:, 1] = obs[:, 1]
             state_batch[:, 2] = theta
         else:
-            raise Exception('Unknown dynamics')
+            raise NotImplementedError('Unknown dynamics')
 
         if expand_dims:
             state_batch = state_batch.squeeze(0)
 
-        if is_tensor:
-            return torch.tensor(state_batch, dtype=dtype, device=device)
-        return state_batch
+        return torch.tensor(state_batch, dtype=dtype, device=device)
 
     def append_transition(
         self,
@@ -371,23 +384,18 @@ def fit_gp_model(self, training_iter: int = 70) -> None:
         self._train_x = train_x
         self._train_y = train_y
 
-    def predict_disturbance(self, test_x: np.ndarray) -> tuple:
+    def predict_disturbance(self, test_x: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
         """Predicts the disturbance at the queried states using the trained Gaussian Process models.
 
         Args:
-            test_x (np.ndarray): The state for which to predict disturbances, shape (n_test, n_s).
+            test_x (torch.Tensor): The state for which to predict disturbances, shape (n_test, n_s).
 
         Returns:
-            tuple: A tuple of arrays (means, variances) where means is the predicted mean disturbance
-                and variances is the corresponding variance, shape (n_test, n_s).
+            tuple: A tuple of arrays (means, variances).
         """
-
-        is_tensor = torch.is_tensor(test_x)
-
-        if is_tensor:
-            dtype = test_x.dtype
-            device = test_x.device
-            test_x = to_numpy(test_x)
+        dtype = test_x.dtype
+        device = test_x.device
+        test_x = test_x.cpu().detach().double().numpy()
 
         expand_dims = len(test_x.shape) == 1
         if expand_dims:
@@ -414,11 +422,7 @@ def predict_disturbance(self, test_x: np.ndarray) -> tuple:
             means = means.squeeze(0)
             f_std = f_std.squeeze(0)
 
-        return (
-            (to_tensor(means, dtype, device), to_tensor(f_std, dtype, device))
-            if is_tensor
-            else (means, f_std)
-        )
+        return (to_tensor(means, dtype, device), to_tensor(f_std, dtype, device))
 
     def load_disturbance_models(self, save_dir: str, epoch: str) -> None:
         """Loads the disturbance models and their training data.
diff --git a/omnisafe/common/utils.py b/omnisafe/common/utils.py
deleted file mode 100644
index ec36fe157..000000000
--- a/omnisafe/common/utils.py
+++ /dev/null
@@ -1,55 +0,0 @@
-import numpy as np
-import torch
-
-
-def to_numpy(x: torch.Tensor) -> np.ndarray:
-    """Convert a torch tensor to a numpy array.
-
-    Args:
-    x (torch.Tensor): A torch tensor to be converted.
-
-    Returns:
-    np.ndarray: A numpy array representation of the input tensor.
-    """
-    return x.cpu().detach().double().numpy()
-
-
-def to_tensor(
-    x: np.ndarray,
-    dtype: torch.dtype,
-    device: torch.device,
-    requires_grad: bool = False,
-) -> torch.Tensor:
-    """Convert a numpy array to a torch tensor of specified type and device.
-
-    Args:
-    x (np.ndarray): A numpy array to be converted.
-    dtype (torch.dtype): The desired data type for the tensor.
-    device (torch.device): The device to store the tensor on.
-    requires_grad (bool): If True, gradients will be computed for operations involving this tensor.
-
-    Returns:
-    torch.Tensor: A torch tensor representation of the input array.
-    """
-    if type(x).__module__ != 'numpy':
-        return x
-    return torch.from_numpy(x).type(dtype).to(device).requires_grad_(requires_grad)
-
-
-def sort_vertices_cclockwise(vertices: np.ndarray) -> np.ndarray:
-    """Sort vertices of a 2D convex polygon in counter-clockwise direction.
-
-    Args:
-    vertices (np.ndarray): An array of shape (n_v, 2) where n_v is the number of vertices.
-
-    Returns:
-    np.ndarray: An array of vertices sorted in counter-clockwise direction.
-    """
-    assert vertices.shape[1] == 2, f'Vertices must each have dimension 2, got {vertices.shape[1]}'
-
-    # Sort vertices
-    polygon_center = vertices.sum(axis=0, keepdims=True) / vertices.shape[0]  # (1, d)
-    rel_vecs = vertices - polygon_center
-    thetas = np.arctan2(rel_vecs[:, 1], rel_vecs[:, 0])
-    idxs = np.argsort(thetas)
-    return vertices[idxs, :]
diff --git a/omnisafe/configs/on-policy/TRPOCBF.yaml b/omnisafe/configs/on-policy/TRPOCBF.yaml
index 8fecee0d4..9d1b67ec0 100644
--- a/omnisafe/configs/on-policy/TRPOCBF.yaml
+++ b/omnisafe/configs/on-policy/TRPOCBF.yaml
@@ -82,6 +82,8 @@ defaults:
     fvp_obs: None
     # The sub-sampling rate of the observation
     fvp_sample_freq: 1
+    # The max steps to update dynamics model
+    update_dynamics_steps: 650
   # logger configurations
   logger_cfgs:
     # use wandb for logging
@@ -91,7 +93,7 @@ defaults:
     # use tensorboard for logging
     use_tensorboard: True
     # save model frequency
-    save_model_freq: 100
+    save_model_freq: 10
     # save logger path
     log_dir: "./runs"
     # save model path
diff --git a/omnisafe/envs/barrier_function_env.py b/omnisafe/envs/barrier_function_env.py
index d664e749b..01477c1fe 100644
--- a/omnisafe/envs/barrier_function_env.py
+++ b/omnisafe/envs/barrier_function_env.py
@@ -33,8 +33,8 @@ class BarrierFunctionEnv(CMDP):
     """Interface of control barrier function-based environments.
 
     .. warning::
-        Since environments based on control barrier functions require special judgment and control of environmental dynamics,
-        they do not support the use of vectorized environments for parallelization.
+        Since environments based on control barrier functions require special judgment and control
+        of environmental dynamics, they do not support the use of vectorized environments.
 
     Attributes:
         need_auto_reset_wrapper (bool): Whether to use auto reset wrapper.
@@ -84,7 +84,7 @@ def __init__(
         else:
             raise NotImplementedError('Only support num_envs=1 now.')
         self._device = torch.device(device)
-        self._episodic_violation = []
+        self._episodic_violation: list[float] = []
         self._num_envs = num_envs
         self._metadata = self._env.metadata
         self.env_spec_log = {'Metrics/Max_angle_violation': 0.0}
@@ -96,17 +96,16 @@ def _env_specific_setting(self) -> None:
         We have organized these adjustments and encapsulated them in this function.
         """
         if self._env_id == 'Pendulum-v1':
-            self._env.unwrapped.max_torque = 15.0
-            self._env.unwrapped.max_speed = 60.0
+            self._env.unwrapped.max_torque = 15.0  # type: ignore
+            self._env.unwrapped.max_speed = 60.0  # type: ignore
             self._env.unwrapped.action_space = spaces.Box(
-                low=-self._env.unwrapped.max_torque,
-                high=self._env.unwrapped.max_torque,
+                low=-self._env.unwrapped.max_torque,  # type: ignore
+                high=self._env.unwrapped.max_torque,  # type: ignore
                 shape=(1,),
             )
-            high = np.array([1.0, 1.0, self._env.unwrapped.max_speed])
+            high = np.array([1.0, 1.0, self._env.unwrapped.max_speed])  # type: ignore
             self._env.unwrapped.observation_space = spaces.Box(low=-high, high=high)
-            self._env.dt = 0.05
-            self._env.dynamics_mode = 'Pendulum'
+            self._env.dt = 0.05  # type: ignore
 
     def step(
         self,
@@ -146,7 +145,7 @@ def step(
             for x in (obs, reward, terminated, truncated)
         )
         cost = torch.abs(torch.atan2(obs[1], obs[0])).to(self._device)
-        self._episodic_violation.append(cost)
+        self._episodic_violation.append(cost.item())
 
         if 'final_observation' in info:
             info['final_observation'] = np.array(
@@ -194,7 +193,7 @@ def reset(
         """
         obs, info = self._env.reset(seed=seed, options=options)
         if self._env_id == 'Pendulum-v1':
-            while self._env.unwrapped.state[0] > 1.0 or self._env.unwrapped.state[0] < -1.0:
+            while self._env.unwrapped.state[0] > 1.0 or self._env.unwrapped.state[0] < -1.0:  # type: ignore
                 obs, info = self._env.reset(options=options)
         return torch.as_tensor(obs, dtype=torch.float32, device=self._device), info
 
@@ -220,4 +219,5 @@ def close(self) -> None:
 
     @property
     def unwrapped(self) -> gymnasium.Env:
+        """Return the original interface of environment."""
         return self._env.unwrapped
diff --git a/omnisafe/envs/robust_barrier_function_env.py b/omnisafe/envs/robust_barrier_function_env.py
index 1f1c10418..9bce446ce 100644
--- a/omnisafe/envs/robust_barrier_function_env.py
+++ b/omnisafe/envs/robust_barrier_function_env.py
@@ -18,10 +18,8 @@
 
 from typing import Any, ClassVar
 
-import gymnasium
 import numpy as np
 import torch
-from gymnasium import spaces
 
 from omnisafe.envs.core import CMDP, env_register
 from omnisafe.envs.unicycle_env import UnicycleEnv
@@ -33,8 +31,9 @@ class RobustBarrierFunctionEnv(CMDP):
     """Interface of control barrier function-based environments.
 
     .. warning::
-        Since environments based on control barrier functions require special judgment and control of environmental dynamics,
-        they do not support the use of vectorized environments for parallelization.
+        Since environments based on control barrier functions require special judgment and control
+        of environmental dynamics, they do not support the use of vectorized environments for
+        parallelization.
 
     Attributes:
         need_auto_reset_wrapper (bool): Whether to use auto reset wrapper.
@@ -168,17 +167,6 @@ def set_seed(self, seed: int) -> None:
         """
         self.reset(seed=seed)
 
-    def sample_action(self) -> torch.Tensor:
-        """Sample a random action.
-
-        Returns:
-            A random action.
-        """
-        return torch.normal(
-            torch.zeros(self.action_space.shape),
-            torch.ones(self.action_space.shape),
-        )
-
     def render(self) -> Any:
         """Render the environment.
 
@@ -192,4 +180,5 @@ def close(self) -> None:
         self._env.close()
 
     def __getattr__(self, name: str) -> Any:
+        """Return the unwrapped environment attributes."""
         return getattr(self._env, name)
diff --git a/omnisafe/envs/unicycle_env.py b/omnisafe/envs/unicycle_env.py
index 4fca58eed..dd0515fba 100644
--- a/omnisafe/envs/unicycle_env.py
+++ b/omnisafe/envs/unicycle_env.py
@@ -1,3 +1,5 @@
+# pylint: disable=all
+# mypy: ignore-errors
 from __future__ import annotations
 
 from collections.abc import Iterable
@@ -9,15 +11,6 @@
 
 
 def to_pixel(meas_cm: list[float] | float, shift: int = 0) -> float:
-    """Convert measurements from centimeters to pixels.
-
-    Args:
-        meas_cm (list[float] | float): A single measurement or a list of measurements in centimeters.
-        shift (int, optional): An integer value that is added to the converted measurement(s). Default is 0.
-
-    Returns:
-        float | np.ndarray: The measurement converted to pixels.
-    """
     if isinstance(meas_cm, Iterable):
         return 1.5 * 37.795 * meas_cm + np.array(shift)
 
@@ -25,7 +18,6 @@ def to_pixel(meas_cm: list[float] | float, shift: int = 0) -> float:
 
 
 class UnicycleEnv(gym.Env):
-    """Custom Environment that follows SafetyGym interface"""
 
     def __init__(self) -> None:
 
@@ -84,41 +76,11 @@ def step(
         self,
         action: np.ndarray,
     ) -> tuple[np.ndarray, float, float, bool, bool, dict[str, Any]]:
-        """
-        Advance the environment state based on the action taken by the agent.
-
-        Parameters:
-            action(np.ndarray): Control action taken by the agent.
-
-        Returns:
-            A tuple containing:
-            - new_obs : np.ndarray, the new observation structured as [pos_x, pos_y, cos(theta), sin(theta), xdir2goal, ydir2goal, dist2goal].
-            - reward : float, reward received after taking the action.
-            - cost : float, cost incurred after taking the action.
-            - terminated : bool, whether the episode has terminated.
-            - truncated : bool, whether the episode was truncated.
-            - info : dict, additional information about the environment's state.
-        """
         action = np.clip(action, -1.0, 1.0)
         state, reward, cost, terminated, truncated, info = self._step(action)
         return self.get_obs(), reward, cost, terminated, truncated, info
 
     def _step(self, action: np.ndarray) -> tuple:
-        """
-        Update the internal state based on the action, considering dynamics and disturbances.
-
-        Parameters:
-            action(np.ndarray): Control action taken by the agent.
-
-        Returns:
-            A tuple containing:
-            - state : np.ndarray, new internal state of the agent.
-            - reward : float, reward collected during this transition.
-            - cost : float, cost incurred during this transition.
-            - terminated : bool, whether the episode has terminated.
-            - truncated : bool, whether the episode was truncated due to reaching a step limit.
-            - info : dict, additional information relevant to the environment.
-        """
         self.state += self.dt * (self.get_f(self.state) + self.get_g(self.state) @ action)
         self.state -= self.dt * 0.1 * self.get_g(self.state) @ np.array([np.cos(self.state[2]), 0])
 
@@ -143,29 +105,9 @@ def _step(self, action: np.ndarray) -> tuple:
         return self.state, reward, cost, terminated, truncated, {}
 
     def goal_met(self) -> bool:
-        """
-        Check if the current goal has been met in this step.
-
-        Returns:
-            True if the agent has reached the goal, False otherwise.
-        """
         return np.linalg.norm(self.state[:2] - self.goal_pos) <= self.goal_size
 
     def reset(self, seed: int | None = None, options: dict | None = None) -> tuple:
-        """
-        Reset the environment to an initial state.
-
-        Parameters:
-            seed : int, optional
-                Seed for random number generator.
-            options : dict, optional
-                Additional options to customize the environment reset.
-
-        Returns:
-            A tuple containing:
-            - observation : np.ndarray, the first observation after reset.
-            - info : dict, additional information about the reset state.
-        """
         self.episode_step = 0
 
         if self.rand_init:
@@ -178,16 +120,6 @@ def reset(self, seed: int | None = None, options: dict | None = None) -> tuple:
         return self.get_obs(), {}
 
     def render(self, mode: str = 'human') -> np.ndarray:
-        """Render the environment to the screen
-
-        Parameters:---
-        mode : str
-        close : bool
-
-        Returns:
-
-        """
-
         if mode != 'human' and mode != 'rgb_array':
             rel_loc = self.goal_pos - self.state[:2]
             theta_error = np.arctan2(rel_loc[1], rel_loc[0]) - self.state[2]
@@ -297,17 +229,6 @@ def get_obs(self) -> np.ndarray:
         )
 
     def _get_dynamics(self) -> tuple[Callable, Callable]:
-        """Get affine Control Barrier Function (CBF) dynamics for a given environment.
-
-        This method provides access to the system's drift and control dynamics, formulated for continuous systems of the form x' = f(x) + g(x)u, where 'x' is the state vector and 'u' is the control vector.
-
-        Returns:
-            get_f : Callable[[np.ndarray], np.ndarray]
-                Function to compute the drift dynamics 'f(x)' of the system.
-
-            get_g : Callable[[np.ndarray], np.ndarray]
-                Function to compute the control dynamics 'g(x)' of the system.
-        """
 
         def get_f(state: np.ndarray) -> np.ndarray:
             """Function to compute the drift dynamics 'f(x)' of the system."""
@@ -321,15 +242,6 @@ def get_g(state: np.ndarray) -> np.ndarray:
         return get_f, get_g
 
     def obs_compass(self) -> np.ndarray:
-        """
-        Return a robot-centric compass observation of a list of positions.
-        Compass is a normalized (unit-lenght) egocentric XY vector,
-        from the agent to the object.
-        This is equivalent to observing the egocentric XY angle to the target,
-        projected into the sin/cos space we use for joints.
-        (See comment on joint observation for why we do this.)
-        """
-
         # Get ego vector in world frame
         vec = self.goal_pos - self.state[:2]
         # Rotate into frame
@@ -351,91 +263,3 @@ def close(self) -> None:
         if self.viewer:
             self.viewer.close()
             self.viewer = None
-
-    def get_random_hazard_locations(self, n_hazards: int, hazard_radius: float) -> None:
-        """
-
-        Parameters:---
-        n_hazards : int
-            Number of hazards to create
-        hazard_radius : float
-            Radius of hazards
-
-        Returns:
-        hazards_locs : np.ndarray
-            Numpy array of shape (n_hazards, 2) containing xy locations of hazards.
-        """
-
-        # Create buffer with boundaries
-        buffered_bds = np.copy(self.bds)
-        buffered_bds[0] = buffered_bds[0] + hazard_radius
-        buffered_bds[1] -= hazard_radius
-
-        hazards = []
-        hazards_centers = np.zeros((n_hazards, 2))
-        n = 0  # Number of hazards actually placed
-        for _ in range(n_hazards):
-            successfully_placed = False
-            iteration = 0
-            hazard_type = np.random.randint(3)  # 0-> Circle 1->Square 2->Triangle
-            radius = hazard_radius * (1 - 0.2 * 2.0 * (np.random.random() - 0.5))
-            while not successfully_placed and iteration < 100:
-                hazards_centers[n] = (buffered_bds[1] - buffered_bds[0]) * np.random.random(
-                    2,
-                ) + buffered_bds[0]
-                successfully_placed = np.all(
-                    np.linalg.norm(hazards_centers[:n] - hazards_centers[[n]], axis=1)
-                    > 3.5 * hazard_radius,
-                )
-                successfully_placed = np.logical_and(
-                    successfully_placed,
-                    np.linalg.norm(self.goal_pos - hazards_centers[n]) > 2.0 * hazard_radius,
-                )
-                successfully_placed = np.logical_and(
-                    successfully_placed,
-                    np.all(
-                        np.linalg.norm(self.initial_state[:, :2] - hazards_centers[[n]], axis=1)
-                        > 2.0 * hazard_radius,
-                    ),
-                )
-                iteration += 1
-            if not successfully_placed:
-                continue
-            if hazard_type == 0:  # Circle
-                hazards.append({'type': 'circle', 'location': hazards_centers[n], 'radius': radius})
-            elif hazard_type == 1:  # Square
-                hazards.append(
-                    {
-                        'type': 'polygon',
-                        'vertices': np.array(
-                            [
-                                [-radius, -radius],
-                                [-radius, radius],
-                                [radius, radius],
-                                [radius, -radius],
-                            ],
-                        ),
-                    },
-                )
-                hazards[-1]['vertices'] += hazards_centers[n]
-            else:  # Triangle
-                hazards.append(
-                    {
-                        'type': 'polygon',
-                        'vertices': np.array(
-                            [
-                                [-radius, -radius],
-                                [-radius, radius],
-                                [radius, radius],
-                                [radius, -radius],
-                            ],
-                        ),
-                    },
-                )
-                # Pick a vertex and delete it
-                idx = np.random.randint(4)
-                hazards[-1]['vertices'] = np.delete(hazards[-1]['vertices'], idx, axis=0)
-                hazards[-1]['vertices'] += hazards_centers[n]
-            n += 1
-
-        self.hazards = hazards
diff --git a/omnisafe/evaluator.py b/omnisafe/evaluator.py
index 2f17f852b..c94c38389 100644
--- a/omnisafe/evaluator.py
+++ b/omnisafe/evaluator.py
@@ -13,6 +13,8 @@
 # limitations under the License.
 # ==============================================================================
 """Implementation of Evaluator."""
+# mypy: ignore-errors
+
 
 from __future__ import annotations
 
@@ -37,6 +39,7 @@
     SafeARCPlanner,
 )
 from omnisafe.common import Normalizer
+<<<<<<< HEAD
 from omnisafe.common.control_barrier_function.crabs.models import (
     AddGaussianNoise,
     CrabsCore,
@@ -47,6 +50,12 @@
 from omnisafe.common.control_barrier_function.crabs.optimizers import Barrier
 from omnisafe.common.control_barrier_function.crabs.utils import Normalizer as CRABSNormalizer
 from omnisafe.common.control_barrier_function.crabs.utils import create_model_and_trainer
+=======
+from omnisafe.common.barrier_comp import BarrierCompensator
+from omnisafe.common.barrier_solver import PendulumSolver
+from omnisafe.common.robust_barrier_solver import CBFQPLayer
+from omnisafe.common.robust_gp_model import DynamicsModel
+>>>>>>> wip
 from omnisafe.envs.core import CMDP, make
 from omnisafe.envs.wrapper import ActionRepeat, ActionScale, ObsNormalize, TimeLimit
 from omnisafe.models.actor import ActorBuilder
@@ -94,6 +103,9 @@ def __init__(
         self._safety_obs = torch.ones(1)
         self._cost_count = torch.zeros(1)
         self.__set_render_mode(render_mode)
+        self._dynamics_model: DynamicsModel | None = None
+        self._solver: PendulumSolver | CBFQPLayer | None = None
+        self._compensator = None
 
     def __set_render_mode(self, render_mode: str) -> None:
         """Set the render mode.
@@ -130,7 +142,7 @@ def __load_cfgs(self, save_dir: str) -> None:
         self._dict_cfgs = kwargs
         self._cfgs = Config.dict2config(kwargs)
 
-    # pylint: disable-next=too-many-branches
+    # pylint: disable-next=attribute-defined-outside-init,import-outside-toplevel,too-many-branches,too-many-locals
     def __load_model_and_env(
         self,
         save_dir: str,
@@ -302,9 +314,7 @@ def __load_model_and_env(
             self._actor = actor_builder.build_actor(actor_type)
             self._actor.load_state_dict(model_params['pi'])
             if self._cfgs['algo'] == 'DDPGCBF' or self._cfgs['algo'] == 'TRPOCBF':
-                from omnisafe.common.barrier_comp import BarrierCompensator
-
-                self.compensator = BarrierCompensator(
+                self._compensator = BarrierCompensator(
                     obs_dim=observation_space.shape[0],
                     act_dim=action_space.shape[0],
                     cfgs=self._cfgs['compensator_cfgs'],
@@ -316,21 +326,18 @@ def __load_model_and_env(
                     raise FileNotFoundError(
                         'The model is not found in the save directory.',
                     ) from error
-                self.compensator.load_state_dict(model_params['compensator'])
+                self._compensator.load_state_dict(model_params['compensator'])
             if self._cfgs['algo'] == 'SACRCBF':
-                from omnisafe.common.robust_barrier_solver import CBFQPLayer
-                from omnisafe.common.robust_gp_model import DynamicsModel
-
                 epoch = model_name.split('.pt')[0].split('-')[-1]
-                self.solver = CBFQPLayer(
+                self._solver = CBFQPLayer(
                     env=self._env,
                     device=self._cfgs['train_cfgs']['device'],
                     gamma_b=self._cfgs['cbf_cfgs']['gamma_b'],
                     k_d=self._cfgs['cbf_cfgs']['k_d'],
                     l_p=self._cfgs['cbf_cfgs']['l_p'],
                 )
-                self.dynamics_model = DynamicsModel(env=self._env)
-                self.dynamics_model.load_disturbance_models(
+                self._dynamics_model = DynamicsModel(env=self._env)
+                self._dynamics_model.load_disturbance_models(
                     save_dir=os.path.join(self._save_dir, 'gp_model_save'),
                     epoch=epoch,
                 )
@@ -417,15 +424,14 @@ def load_saved(
         self.__set_render_mode(render_mode)
 
         if self._cfgs['algo'] == 'DDPGCBF' or self._cfgs['algo'] == 'TRPOCBF':
-            from omnisafe.common.barrier_solver import PendulumSolver
 
-            self.solver = PendulumSolver()
+            self._solver = PendulumSolver()
             path = os.path.join(
                 save_dir,
                 'gp_model_save',
                 f'gaussian_process_regressor_{epoch}.pkl',
             )
-            self.solver.build_gp_model(save_dir=path)
+            self._solver.build_gp_model(save_dir=path)
 
         env_kwargs = {
             'env_id': self._cfgs['env_id'],
@@ -441,6 +447,7 @@ def load_saved(
 
         self.__load_model_and_env(save_dir, model_name, env_kwargs)
 
+    # pylint: disable-next=too-many-locals
     def evaluate(
         self,
         num_episodes: int = 10,
@@ -498,10 +505,10 @@ def evaluate(
                             'The policy must be provided or created before evaluating the agent.',
                         )
                 if self._cfgs['algo'] == 'DDPGCBF' or self._cfgs['algo'] == 'TRPOCBF':
-                    approx_compensating_act = self.compensator(obs=obs)
+                    approx_compensating_act = self._compensator(obs=obs)
                     compensated_act_mean_raw = act + approx_compensating_act
-                    [f, g, x, std] = self.solver.get_gp_dynamics(obs, use_prev_model=False)
-                    compensating_act = self.solver.control_barrier(
+                    [f, g, x, std] = self._solver.get_gp_dynamics(obs, use_prev_model=False)
+                    compensating_act = self._solver.control_barrier(
                         compensated_act_mean_raw,
                         f,
                         g,
@@ -511,11 +518,11 @@ def evaluate(
                     act = compensated_act_mean_raw + compensating_act
 
                 if self._cfgs['algo'] == 'SACRCBF':
-                    state_batch = self.dynamics_model.get_state(obs)
-                    mean_pred_batch, sigma_pred_batch = self.dynamics_model.predict_disturbance(
+                    state_batch = self._dynamics_model.get_state(obs)
+                    mean_pred_batch, sigma_pred_batch = self._dynamics_model.predict_disturbance(
                         state_batch,
                     )
-                    safe_act = self.solver.get_safe_action(
+                    safe_act = self._solver.get_safe_action(
                         state_batch,
                         act,
                         mean_pred_batch,
diff --git a/omnisafe/utils/tools.py b/omnisafe/utils/tools.py
index 2c0c626eb..7c7a10ceb 100644
--- a/omnisafe/utils/tools.py
+++ b/omnisafe/utils/tools.py
@@ -356,3 +356,40 @@ def get_device(device: torch.device | str | int = DEVICE_CPU) -> torch.device:
         return torch.device('cpu')
 
     return device
+
+
+def to_tensor(
+    x: np.ndarray,
+    dtype: torch.dtype,
+    device: torch.device,
+    requires_grad: bool = False,
+) -> torch.Tensor:
+    """Convert a numpy array to a torch tensor of specified type and device.
+
+    Args:
+    x (np.ndarray): A numpy array to be converted.
+    dtype (torch.dtype): The desired data type for the tensor.
+    device (torch.device): The device to store the tensor on.
+    requires_grad (bool): If True, gradients will be computed for operations involving this tensor.
+
+    Returns:
+    torch.Tensor: A torch tensor representation of the input array.
+    """
+    return torch.from_numpy(x).type(dtype).to(device).requires_grad_(requires_grad)
+
+
+def sort_vertices_cclockwise(vertices: np.ndarray) -> np.ndarray:
+    """Sort vertices of a 2D convex polygon in counter-clockwise direction.
+
+    Args:
+    vertices (np.ndarray): An array of shape (n_v, 2) where n_v is the number of vertices.
+
+    Returns:
+    np.ndarray: An array of vertices sorted in counter-clockwise direction.
+    """
+    assert vertices.shape[1] == 2, f'Vertices must each have dimension 2, got {vertices.shape[1]}'
+    polygon_center = vertices.sum(axis=0, keepdims=True) / vertices.shape[0]  # (1, d)
+    rel_vecs = vertices - polygon_center
+    thetas = np.arctan2(rel_vecs[:, 1], rel_vecs[:, 0])
+    idxs = np.argsort(thetas)
+    return vertices[idxs, :]

From 23f66d13dd38920c1785cc66b761d528760c0e4e Mon Sep 17 00:00:00 2001
From: Gaiejj <524339208@qq.com>
Date: Mon, 6 May 2024 15:13:50 +0800
Subject: [PATCH 04/18] chore: update pytest

---
 .pre-commit-config.yaml                       |   2 +-
 omnisafe/adapter/barrier_function_adapter.py  |  28 +-
 .../adapter/beta_barrier_function_adapter.py  |  18 +-
 .../offpolicy_barrier_function_adapter.py     |   2 +-
 .../robust_barrier_function_adapter.py        |   2 +-
 omnisafe/algorithms/off_policy/__init__.py    |  15 +-
 omnisafe/algorithms/off_policy/ddpg_cbf.py    |   3 -
 .../on_policy/barrier_function/trpo_cbf.py    |   7 +-
 omnisafe/common/robust_barrier_solver.py      |   2 +
 omnisafe/envs/__init__.py                     |   5 +-
 omnisafe/envs/classic_control/__init__.py     |   3 +
 .../envs_from_cbf.py}                         |  16 +-
 .../envs_from_rcbf.py}                        | 188 ++++++++++++-
 omnisafe/envs/unicycle_env.py                 | 265 ------------------
 omnisafe/evaluator.py                         |  30 +-
 tests/test_policy.py                          |  51 +++-
 16 files changed, 333 insertions(+), 304 deletions(-)
 rename omnisafe/envs/{barrier_function_env.py => classic_control/envs_from_cbf.py} (95%)
 rename omnisafe/envs/{robust_barrier_function_env.py => classic_control/envs_from_rcbf.py} (51%)
 delete mode 100644 omnisafe/envs/unicycle_env.py

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 42e2956f9..63f378224 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -114,10 +114,10 @@ repos:
             ^tests/|
             ^setup.py$|
             ^omnisafe/envs/classic_control/envs_from_crabs.py$|
+            ^omnisafe/envs/classic_control/envs_from_rcbf.py|
             ^omnisafe/common/control_barrier_function/crabs/models.py$|
             ^omnisafe/common/control_barrier_function/crabs/optimizers.py$|
             ^omnisafe/common/control_barrier_function/crabs/utils.py$|
             ^conftest.py$|
-            ^omnisafe/envs/unicycle_env.py|
             ^setup.py$
           )
diff --git a/omnisafe/adapter/barrier_function_adapter.py b/omnisafe/adapter/barrier_function_adapter.py
index a91218b48..469f4e7cd 100644
--- a/omnisafe/adapter/barrier_function_adapter.py
+++ b/omnisafe/adapter/barrier_function_adapter.py
@@ -71,18 +71,23 @@ def _wrapper(
         """
         assert not obs_normalize, 'Barrier function does not support observation normalization!'
         if self._env.need_time_limit_wrapper:
-            self._env = TimeLimit(self._env, time_limit=1000, device=self._device)
-            self._eval_env = TimeLimit(self._eval_env, time_limit=1000, device=self._device)
+            assert (
+                self._env.max_episode_steps
+            ), 'You must define max_episode_steps as an integer\
+                \nor cancel the use of the time_limit wrapper.'
+            self._env = TimeLimit(
+                self._env,
+                time_limit=self._env.max_episode_steps,
+                device=self._device,
+            )
         if self._env.need_auto_reset_wrapper:
             self._env = AutoReset(self._env, device=self._device)
-            self._eval_env = AutoReset(self._eval_env, device=self._device)
         if reward_normalize:
             self._env = RewardNormalize(self._env, device=self._device)
         if cost_normalize:
             self._env = CostNormalize(self._env, device=self._device)
         if self._env.num_envs == 1:
             self._env = Unsqueeze(self._env, device=self._device)
-        self._eval_env = Unsqueeze(self._eval_env, device=self._device)
 
     def set_solver(self, solver: PendulumSolver) -> None:
         """Set the barrier function solver for Pendulum environment."""
@@ -96,7 +101,7 @@ def reset_gp_model(self) -> None:
         """Reset the gaussian processing model of barrier function solver."""
         self.solver.reset_gp_model()
 
-    def rollout(  # pylint: disable=too-many-locals
+    def rollout(  # pylint: disable=too-many-locals,too-many-branches
         self,
         steps_per_epoch: int,
         agent: ConstraintActorCritic,
@@ -158,7 +163,6 @@ def rollout(  # pylint: disable=too-many-locals
             self._log_value(reward=reward, cost=cost, info=info)
 
             logger.store({'Value/reward': value_r})
-            logger.store({'Metrics/angle': cost})
 
             buffer.store(
                 obs=obs,
@@ -174,15 +178,21 @@ def rollout(  # pylint: disable=too-many-locals
 
             obs = next_obs
             epoch_end = step >= steps_per_epoch
+
+            if epoch_end:
+                num_dones = int(terminated.contiguous().sum())
+                if self._env.num_envs - num_dones:
+                    logger.log(
+                        f'\nWarning: trajectory cut off when rollout by epoch\
+                            in {self._env.num_envs - num_dones} of {self._env.num_envs} environments.',
+                    )
+
             for idx, (done, time_out) in enumerate(zip(terminated, truncated)):
                 if epoch_end or done or time_out:
                     last_value_r = torch.zeros(1)
                     last_value_c = torch.zeros(1)
                     if not done:
                         if epoch_end:
-                            logger.log(
-                                f'Warning: trajectory cut off when rollout by epoch at {self._ep_len[idx]} steps.',
-                            )
                             _, last_value_r, last_value_c, _ = agent.step(obs[idx])
                         if time_out:
                             _, last_value_r, last_value_c, _ = agent.step(
diff --git a/omnisafe/adapter/beta_barrier_function_adapter.py b/omnisafe/adapter/beta_barrier_function_adapter.py
index 844c0b4ce..d5738e02d 100644
--- a/omnisafe/adapter/beta_barrier_function_adapter.py
+++ b/omnisafe/adapter/beta_barrier_function_adapter.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Barrier Function Adapter for OmniSafe."""
+"""Barrier Function Adapter with Beta Distribution for OmniSafe."""
 
 from __future__ import annotations
 
@@ -25,7 +25,7 @@
 from omnisafe.adapter.onpolicy_adapter import OnPolicyAdapter
 from omnisafe.common.buffer import VectorOnPolicyBuffer
 from omnisafe.common.logger import Logger
-from omnisafe.envs.wrapper import CostNormalize, RewardNormalize, Unsqueeze
+from omnisafe.envs.wrapper import AutoReset, CostNormalize, RewardNormalize, TimeLimit, Unsqueeze
 from omnisafe.models.actor_critic.constraint_actor_critic import ConstraintActorCritic
 from omnisafe.utils.config import Config
 
@@ -157,13 +157,24 @@ def _wrapper(
             cost_normalize (bool, optional): Whether to normalize the cost. Defaults to True.
         """
         assert not obs_normalize, 'Barrier function does not support observation normalization!'
+        if self._env.need_time_limit_wrapper:
+            assert (
+                self._env.max_episode_steps
+            ), 'You must define max_episode_steps as an integer\
+                \nor cancel the use of the time_limit wrapper.'
+            self._env = TimeLimit(
+                self._env,
+                time_limit=self._env.max_episode_steps,
+                device=self._device,
+            )
+        if self._env.need_auto_reset_wrapper:
+            self._env = AutoReset(self._env, device=self._device)
         if reward_normalize:
             self._env = RewardNormalize(self._env, device=self._device)
         if cost_normalize:
             self._env = CostNormalize(self._env, device=self._device)
         if self._env.num_envs == 1:
             self._env = Unsqueeze(self._env, device=self._device)
-        self._eval_env = Unsqueeze(self._eval_env, device=self._device)
 
     def rollout(  # pylint: disable=too-many-locals
         self,
@@ -203,7 +214,6 @@ def rollout(  # pylint: disable=too-many-locals
             if self._cfgs.algo_cfgs.use_cost:
                 logger.store({'Value/cost': value_c})
             logger.store({'Value/reward': value_r})
-            logger.store({'Metrics/angle': info.get('original_cost', cost).cpu()})
 
             buffer.store(
                 obs=obs,
diff --git a/omnisafe/adapter/offpolicy_barrier_function_adapter.py b/omnisafe/adapter/offpolicy_barrier_function_adapter.py
index 49bf7909c..f40a7add9 100644
--- a/omnisafe/adapter/offpolicy_barrier_function_adapter.py
+++ b/omnisafe/adapter/offpolicy_barrier_function_adapter.py
@@ -68,7 +68,6 @@ def _wrapper(
             self._env = CostNormalize(self._env, device=self._device)
         if self._env.num_envs == 1:
             self._env = Unsqueeze(self._env, device=self._device)
-        self._eval_env = Unsqueeze(self._eval_env, device=self._device)
 
     def eval_policy(  # pylint: disable=too-many-locals
         self,
@@ -83,6 +82,7 @@ def eval_policy(  # pylint: disable=too-many-locals
             agent (ConstraintActorCritic): Agent.
             logger (Logger): Logger, to log ``EpRet``, ``EpCost``, ``EpLen``.
         """
+        assert self._eval_env
         for _ in range(episode):
             ep_ret, ep_cost, ep_len = 0.0, 0.0, 0
             obs, _ = self._eval_env.reset()
diff --git a/omnisafe/adapter/robust_barrier_function_adapter.py b/omnisafe/adapter/robust_barrier_function_adapter.py
index f56674319..8da2cf658 100644
--- a/omnisafe/adapter/robust_barrier_function_adapter.py
+++ b/omnisafe/adapter/robust_barrier_function_adapter.py
@@ -76,7 +76,6 @@ def _wrapper(
             self._env = CostNormalize(self._env, device=self._device)
         if self._env.num_envs == 1:
             self._env = Unsqueeze(self._env, device=self._device)
-        self._eval_env = Unsqueeze(self._eval_env, device=self._device)
 
     def set_solver(self, solver: CBFQPLayer) -> None:
         """Set the barrier function solver for Pendulum environment."""
@@ -101,6 +100,7 @@ def eval_policy(  # pylint: disable=too-many-locals
             agent (ConstraintActorCritic): Agent.
             logger (Logger): Logger, to log ``EpRet``, ``EpCost``, ``EpLen``.
         """
+        assert self._eval_env
         for _ in range(episode):
             ep_ret, ep_cost, ep_len = 0.0, 0.0, 0
             obs, _ = self._eval_env.reset()
diff --git a/omnisafe/algorithms/off_policy/__init__.py b/omnisafe/algorithms/off_policy/__init__.py
index 5a297c49f..1e14ebd26 100644
--- a/omnisafe/algorithms/off_policy/__init__.py
+++ b/omnisafe/algorithms/off_policy/__init__.py
@@ -28,4 +28,17 @@
 from omnisafe.algorithms.off_policy.td3_pid import TD3PID
 
 
-__all__ = ['DDPG', 'TD3', 'SAC', 'DDPGLag', 'TD3Lag', 'SACLag', 'DDPGPID', 'TD3PID', 'SACPID', 'SACRCBF', 'DDPGCBF', 'CRABS']
+__all__ = [
+    'DDPG',
+    'TD3',
+    'SAC',
+    'DDPGLag',
+    'TD3Lag',
+    'SACLag',
+    'DDPGPID',
+    'TD3PID',
+    'SACPID',
+    'SACRCBF',
+    'DDPGCBF',
+    'CRABS',
+]
diff --git a/omnisafe/algorithms/off_policy/ddpg_cbf.py b/omnisafe/algorithms/off_policy/ddpg_cbf.py
index 32b27be1d..de556372b 100644
--- a/omnisafe/algorithms/off_policy/ddpg_cbf.py
+++ b/omnisafe/algorithms/off_policy/ddpg_cbf.py
@@ -126,8 +126,5 @@ def _log_what_to_save(self) -> dict[str, Any]:
 
         what_to_save['pi'] = self._actor_critic.actor
         what_to_save['compensator'] = self._env.compensator
-        if self._cfgs.algo_cfgs.obs_normalize:
-            obs_normalizer = self._env.save()['obs_normalizer']
-            what_to_save['obs_normalizer'] = obs_normalizer
 
         self._logger.setup_torch_saver(what_to_save)
diff --git a/omnisafe/algorithms/on_policy/barrier_function/trpo_cbf.py b/omnisafe/algorithms/on_policy/barrier_function/trpo_cbf.py
index 72238e41a..8125151d6 100644
--- a/omnisafe/algorithms/on_policy/barrier_function/trpo_cbf.py
+++ b/omnisafe/algorithms/on_policy/barrier_function/trpo_cbf.py
@@ -112,7 +112,6 @@ def _update(self) -> None:
             act,
             logp,
             target_value_r,
-            target_value_c,
             adv_r,
             adv_c,
             approx_compensating_act,
@@ -122,7 +121,6 @@ def _update(self) -> None:
             data['act'],
             data['logp'],
             data['target_value_r'],
-            data['target_value_c'],
             data['adv_r'],
             data['adv_c'],
             data['approx_compensating_act'],
@@ -136,7 +134,7 @@ def _update(self) -> None:
             compensating_act=compensating_act,
         )
         dataloader = DataLoader(
-            dataset=TensorDataset(obs, target_value_r, target_value_c),
+            dataset=TensorDataset(obs, target_value_r),
             batch_size=self._cfgs.algo_cfgs.batch_size,
             shuffle=True,
         )
@@ -145,11 +143,8 @@ def _update(self) -> None:
             for (
                 obs,
                 target_value_r,
-                target_value_c,
             ) in dataloader:
                 self._update_reward_critic(obs, target_value_r)
-                if self._cfgs.algo_cfgs.use_cost:
-                    self._update_cost_critic(obs, target_value_c)
 
         self._logger.store(
             {
diff --git a/omnisafe/common/robust_barrier_solver.py b/omnisafe/common/robust_barrier_solver.py
index 3e14d002c..62499352b 100644
--- a/omnisafe/common/robust_barrier_solver.py
+++ b/omnisafe/common/robust_barrier_solver.py
@@ -19,6 +19,7 @@
 # pylint: disable=invalid-name,wrong-spelling-in-docstring
 from __future__ import annotations
 
+import warnings
 from typing import Any
 
 import gymnasium as gym
@@ -58,6 +59,7 @@ def __init__(
         self.k_d = k_d
         self.l_p = l_p
         self.action_dim = env.action_space.shape[0]
+        warnings.filterwarnings('ignore')
 
     def get_safe_action(
         self,
diff --git a/omnisafe/envs/__init__.py b/omnisafe/envs/__init__.py
index c21b1973c..fb1bf03b1 100644
--- a/omnisafe/envs/__init__.py
+++ b/omnisafe/envs/__init__.py
@@ -15,15 +15,14 @@
 """Environment API for OmniSafe."""
 
 from omnisafe.envs import classic_control
-from omnisafe.envs.barrier_function_env import BarrierFunctionEnv
+from omnisafe.envs.classic_control.envs_from_cbf import BarrierFunctionEnv
+from omnisafe.envs.classic_control.envs_from_rcbf import RobustBarrierFunctionEnv
 from omnisafe.envs.core import CMDP, env_register, make, support_envs
 from omnisafe.envs.crabs_env import CRABSEnv
 from omnisafe.envs.custom_env import CustomEnv
 from omnisafe.envs.meta_drive_env import SafetyMetaDriveEnv
 from omnisafe.envs.barrier_function_env import BarrierFunctionEnv
 from omnisafe.envs.mujoco_env import MujocoEnv
-from omnisafe.envs.robust_barrier_function_env import RobustBarrierFunctionEnv
 from omnisafe.envs.safety_gymnasium_env import SafetyGymnasiumEnv
 from omnisafe.envs.safety_gymnasium_modelbased import SafetyGymnasiumModelBased
 from omnisafe.envs.safety_isaac_gym_env import SafetyIsaacGymEnv
-from omnisafe.envs.robust_barrier_function_env import RobustBarrierFunctionEnv
diff --git a/omnisafe/envs/classic_control/__init__.py b/omnisafe/envs/classic_control/__init__.py
index d899a41de..9d5a3ba99 100644
--- a/omnisafe/envs/classic_control/__init__.py
+++ b/omnisafe/envs/classic_control/__init__.py
@@ -13,4 +13,7 @@
 # limitations under the License.
 # ==============================================================================
 """Environment implementations from papers."""
+
 from omnisafe.envs.classic_control import envs_from_crabs
+from omnisafe.envs.classic_control.envs_from_cbf import BarrierFunctionEnv
+from omnisafe.envs.classic_control.envs_from_rcbf import RobustBarrierFunctionEnv
diff --git a/omnisafe/envs/barrier_function_env.py b/omnisafe/envs/classic_control/envs_from_cbf.py
similarity index 95%
rename from omnisafe/envs/barrier_function_env.py
rename to omnisafe/envs/classic_control/envs_from_cbf.py
index 01477c1fe..c46012b8d 100644
--- a/omnisafe/envs/barrier_function_env.py
+++ b/omnisafe/envs/classic_control/envs_from_cbf.py
@@ -14,6 +14,9 @@
 # ==============================================================================
 """Interface of control barrier function-based environments."""
 
+# mypy: ignore-errors
+# pylint: disable=all
+
 from __future__ import annotations
 
 from typing import Any, ClassVar
@@ -72,7 +75,11 @@ def __init__(
         super().__init__(env_id)
         self._env_id = env_id
         if num_envs == 1:
-            self._env = gymnasium.make(id=env_id, autoreset=False)
+            self._env = gymnasium.make(
+                id=env_id,
+                autoreset=False,
+                render_mode=kwargs.get('render_mode'),
+            )
             self._env_specific_setting()
             assert isinstance(self._env.action_space, Box), 'Only support Box action space.'
             assert isinstance(
@@ -103,7 +110,7 @@ def _env_specific_setting(self) -> None:
                 high=self._env.unwrapped.max_torque,  # type: ignore
                 shape=(1,),
             )
-            high = np.array([1.0, 1.0, self._env.unwrapped.max_speed])  # type: ignore
+            high = np.array([1.0, 1.0, self._env.unwrapped.max_speed], dtype=np.float32)  # type: ignore
             self._env.unwrapped.observation_space = spaces.Box(low=-high, high=high)
             self._env.dt = 0.05  # type: ignore
 
@@ -197,6 +204,11 @@ def reset(
                 obs, info = self._env.reset(options=options)
         return torch.as_tensor(obs, dtype=torch.float32, device=self._device), info
 
+    @property
+    def max_episode_steps(self) -> int:
+        """The max steps per episode."""
+        return self._env.spec.max_episode_steps
+
     def set_seed(self, seed: int) -> None:
         """Set the seed for the environment.
 
diff --git a/omnisafe/envs/robust_barrier_function_env.py b/omnisafe/envs/classic_control/envs_from_rcbf.py
similarity index 51%
rename from omnisafe/envs/robust_barrier_function_env.py
rename to omnisafe/envs/classic_control/envs_from_rcbf.py
index 9bce446ce..e97aaaea5 100644
--- a/omnisafe/envs/robust_barrier_function_env.py
+++ b/omnisafe/envs/classic_control/envs_from_rcbf.py
@@ -14,18 +14,202 @@
 # ==============================================================================
 """Interface of control barrier function-based environments."""
 
+# mypy: ignore-errors
+# pylint: disable=all
+
 from __future__ import annotations
 
-from typing import Any, ClassVar
+from collections.abc import Iterable
+from typing import Any, Callable, ClassVar
 
+import gymnasium
 import numpy as np
 import torch
+from gymnasium import spaces
 
 from omnisafe.envs.core import CMDP, env_register
-from omnisafe.envs.unicycle_env import UnicycleEnv
 from omnisafe.typing import Box
 
 
+def to_pixel(meas_cm: list[float] | float, shift: int = 0) -> float:
+    if isinstance(meas_cm, Iterable):
+        return 1.5 * 37.795 * meas_cm + np.array(shift)
+
+    return 1.5 * 37.795 * meas_cm + shift
+
+
+class UnicycleEnv(gymnasium.Env):
+
+    def __init__(self) -> None:
+
+        super().__init__()
+
+        self.dynamics_mode = 'Unicycle'
+        self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(2,))
+        self.safe_action_space = spaces.Box(low=-2.5, high=2.5, shape=(2,))
+        self.observation_space = spaces.Box(low=-1e10, high=1e10, shape=(7,))
+        self.bds = np.array([[-3.0, -3.0], [3.0, 3.0]])
+
+        self.dt = 0.02
+        self.max_episode_steps = 1000
+        self.reward_goal = 1.0
+        self.goal_size = 0.3
+        # Initialize Env
+        self.state = None
+        self.episode_step = 0
+        self.initial_state = np.array(
+            [[-2.5, -2.5, 0.0], [-2.5, 2.5, 0.0], [-2.5, 0.0, 0.0], [2.5, -2.5, np.pi / 2]],
+        )
+        self.goal_pos = np.array([2.5, 2.5])
+        self.rand_init = False
+
+        self.reset()
+
+        # Get Dynamics
+        self.get_f, self.get_g = self._get_dynamics()
+        # Disturbance
+        self.disturb_mean = np.zeros((3,))
+        self.disturb_covar = np.diag([0.005, 0.005, 0.05]) * 20
+
+        # Build Hazards
+        self.hazards = []
+
+        self.hazards.append(
+            {'type': 'circle', 'radius': 0.6, 'location': 1.5 * np.array([0.0, 0.0])},
+        )
+        self.hazards.append(
+            {'type': 'circle', 'radius': 0.6, 'location': 1.5 * np.array([-1.0, 1.0])},
+        )
+        self.hazards.append(
+            {'type': 'circle', 'radius': 0.6, 'location': 1.5 * np.array([-1.0, -1.0])},
+        )
+        self.hazards.append(
+            {'type': 'circle', 'radius': 0.6, 'location': 1.5 * np.array([1.0, -1.0])},
+        )
+        self.hazards.append(
+            {'type': 'circle', 'radius': 0.6, 'location': 1.5 * np.array([1.0, 1.0])},
+        )
+
+        # Viewer
+        self.viewer = None
+
+    def step(
+        self,
+        action: np.ndarray,
+    ) -> tuple[np.ndarray, float, float, bool, bool, dict[str, Any]]:
+        """Step the environment."""
+        action = np.clip(action, -1.0, 1.0)
+        state, reward, cost, terminated, truncated, info = self._step(action)
+        return self.get_obs(), reward, cost, terminated, truncated, info
+
+    def _step(self, action: np.ndarray) -> tuple:
+        """The details of step dynamics."""
+        self.state += self.dt * (self.get_f(self.state) + self.get_g(self.state) @ action)
+        self.state -= self.dt * 0.1 * self.get_g(self.state) @ np.array([np.cos(self.state[2]), 0])
+
+        self.episode_step += 1
+
+        dist_goal = self._goal_dist()
+        reward = self.last_goal_dist - dist_goal
+        self.last_goal_dist = dist_goal
+        terminated = False
+        if self.goal_met():
+            reward += self.reward_goal
+            terminated = True
+        truncated = self.episode_step >= self.max_episode_steps
+
+        cost = 0.0
+        for hazard in self.hazards:
+            if hazard['type'] == 'circle':
+                cost += 0.1 * (
+                    np.sum((self.state[:2] - hazard['location']) ** 2) < hazard['radius'] ** 2
+                )
+
+        return self.state, reward, cost, terminated, truncated, {}
+
+    def goal_met(self) -> bool:
+        return np.linalg.norm(self.state[:2] - self.goal_pos) <= self.goal_size
+
+    def reset(self, seed: int | None = None, options: dict | None = None) -> tuple:
+        self.episode_step = 0
+
+        if self.rand_init:
+            self.state = np.copy(self.initial_state[np.random.randint(self.initial_state.shape[0])])
+        else:
+            self.state = np.copy(self.initial_state[0])
+
+        self.last_goal_dist = self._goal_dist()
+
+        return self.get_obs(), {}
+
+    def render(self, mode: str = 'human') -> np.ndarray:
+        """Get the image of the running environment."""
+        raise NotImplementedError
+
+    def get_obs(self) -> np.ndarray:
+        """Given the state, this function returns corresponding observation.
+
+        Returns:
+          Observation: np.ndarray.
+        """
+
+        rel_loc = self.goal_pos - self.state[:2]
+        goal_dist = np.linalg.norm(rel_loc)
+        goal_compass = self.obs_compass()  # compass to the goal
+
+        return np.array(
+            [
+                self.state[0],
+                self.state[1],
+                np.cos(self.state[2]),
+                np.sin(self.state[2]),
+                goal_compass[0],
+                goal_compass[1],
+                np.exp(-goal_dist),
+            ],
+        )
+
+    def obs_compass(self) -> np.ndarray:
+        """Return a robot-centric compass observation of a list of positions."""
+
+        # Get ego vector in world frame
+        vec = self.goal_pos - self.state[:2]
+        # Rotate into frame
+        R = np.array(
+            [
+                [np.cos(self.state[2]), -np.sin(self.state[2])],
+                [np.sin(self.state[2]), np.cos(self.state[2])],
+            ],
+        )
+        vec = np.matmul(vec, R)
+        # Normalize
+        vec /= np.sqrt(np.sum(np.square(vec))) + 0.001
+        return vec
+
+    def _get_dynamics(self) -> tuple[Callable, Callable]:
+
+        def get_f(state: np.ndarray) -> np.ndarray:
+            """Function to compute the drift dynamics 'f(x)' of the system."""
+            return np.zeros(state.shape)
+
+        def get_g(state: np.ndarray) -> np.ndarray:
+            """Function to compute the control dynamics 'g(x)' of the system."""
+            theta = state[2]
+            return np.array([[np.cos(theta), 0], [np.sin(theta), 0], [0, 1.0]])
+
+        return get_f, get_g
+
+    def _goal_dist(self) -> np.ndarray:
+        """Calculate the distance between the goal."""
+        return np.linalg.norm(self.goal_pos - self.state[:2])
+
+    def close(self) -> None:
+        """Close the instance of environment."""
+        if self.viewer:
+            self.viewer.close()
+            self.viewer = None
+
+
 @env_register
 class RobustBarrierFunctionEnv(CMDP):
     """Interface of control barrier function-based environments.
diff --git a/omnisafe/envs/unicycle_env.py b/omnisafe/envs/unicycle_env.py
deleted file mode 100644
index dd0515fba..000000000
--- a/omnisafe/envs/unicycle_env.py
+++ /dev/null
@@ -1,265 +0,0 @@
-# pylint: disable=all
-# mypy: ignore-errors
-from __future__ import annotations
-
-from collections.abc import Iterable
-from typing import Any, Callable
-
-import gymnasium as gym
-import numpy as np
-from gymnasium import spaces
-
-
-def to_pixel(meas_cm: list[float] | float, shift: int = 0) -> float:
-    if isinstance(meas_cm, Iterable):
-        return 1.5 * 37.795 * meas_cm + np.array(shift)
-
-    return 1.5 * 37.795 * meas_cm + shift
-
-
-class UnicycleEnv(gym.Env):
-
-    def __init__(self) -> None:
-
-        super().__init__()
-
-        self.dynamics_mode = 'Unicycle'
-        self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(2,))
-        self.safe_action_space = spaces.Box(low=-2.5, high=2.5, shape=(2,))
-        self.observation_space = spaces.Box(low=-1e10, high=1e10, shape=(7,))
-        self.bds = np.array([[-3.0, -3.0], [3.0, 3.0]])
-
-        self.dt = 0.02
-        self.max_episode_steps = 1000
-        self.reward_goal = 1.0
-        self.goal_size = 0.3
-        # Initialize Env
-        self.state = None
-        self.episode_step = 0
-        self.initial_state = np.array(
-            [[-2.5, -2.5, 0.0], [-2.5, 2.5, 0.0], [-2.5, 0.0, 0.0], [2.5, -2.5, np.pi / 2]],
-        )
-        self.goal_pos = np.array([2.5, 2.5])
-        self.rand_init = False
-
-        self.reset()
-
-        # Get Dynamics
-        self.get_f, self.get_g = self._get_dynamics()
-        # Disturbance
-        self.disturb_mean = np.zeros((3,))
-        self.disturb_covar = np.diag([0.005, 0.005, 0.05]) * 20
-
-        # Build Hazards
-        self.hazards = []
-
-        self.hazards.append(
-            {'type': 'circle', 'radius': 0.6, 'location': 1.5 * np.array([0.0, 0.0])},
-        )
-        self.hazards.append(
-            {'type': 'circle', 'radius': 0.6, 'location': 1.5 * np.array([-1.0, 1.0])},
-        )
-        self.hazards.append(
-            {'type': 'circle', 'radius': 0.6, 'location': 1.5 * np.array([-1.0, -1.0])},
-        )
-        self.hazards.append(
-            {'type': 'circle', 'radius': 0.6, 'location': 1.5 * np.array([1.0, -1.0])},
-        )
-        self.hazards.append(
-            {'type': 'circle', 'radius': 0.6, 'location': 1.5 * np.array([1.0, 1.0])},
-        )
-
-        # Viewer
-        self.viewer = None
-
-    def step(
-        self,
-        action: np.ndarray,
-    ) -> tuple[np.ndarray, float, float, bool, bool, dict[str, Any]]:
-        action = np.clip(action, -1.0, 1.0)
-        state, reward, cost, terminated, truncated, info = self._step(action)
-        return self.get_obs(), reward, cost, terminated, truncated, info
-
-    def _step(self, action: np.ndarray) -> tuple:
-        self.state += self.dt * (self.get_f(self.state) + self.get_g(self.state) @ action)
-        self.state -= self.dt * 0.1 * self.get_g(self.state) @ np.array([np.cos(self.state[2]), 0])
-
-        self.episode_step += 1
-
-        dist_goal = self._goal_dist()
-        reward = self.last_goal_dist - dist_goal
-        self.last_goal_dist = dist_goal
-        terminated = False
-        if self.goal_met():
-            reward += self.reward_goal
-            terminated = True
-        truncated = self.episode_step >= self.max_episode_steps
-
-        cost = 0.0
-        for hazard in self.hazards:
-            if hazard['type'] == 'circle':
-                cost += 0.1 * (
-                    np.sum((self.state[:2] - hazard['location']) ** 2) < hazard['radius'] ** 2
-                )
-
-        return self.state, reward, cost, terminated, truncated, {}
-
-    def goal_met(self) -> bool:
-        return np.linalg.norm(self.state[:2] - self.goal_pos) <= self.goal_size
-
-    def reset(self, seed: int | None = None, options: dict | None = None) -> tuple:
-        self.episode_step = 0
-
-        if self.rand_init:
-            self.state = np.copy(self.initial_state[np.random.randint(self.initial_state.shape[0])])
-        else:
-            self.state = np.copy(self.initial_state[0])
-
-        self.last_goal_dist = self._goal_dist()
-
-        return self.get_obs(), {}
-
-    def render(self, mode: str = 'human') -> np.ndarray:
-        if mode != 'human' and mode != 'rgb_array':
-            rel_loc = self.goal_pos - self.state[:2]
-            theta_error = np.arctan2(rel_loc[1], rel_loc[0]) - self.state[2]
-            print(
-                f'Ep_step = {self.episode_step}, \tState = {self.state}, \tDist2Goal = {self._goal_dist()}, alignment_error = {theta_error}',
-            )
-
-        screen_width = 600
-        screen_height = 400
-
-        if self.viewer is None:
-            from envs import pyglet_rendering
-
-            self.viewer = pyglet_rendering.Viewer(screen_width, screen_height)
-            # Draw obstacles
-            obstacles = []
-            for i in range(len(self.hazards)):
-                if self.hazards[i]['type'] == 'circle':
-                    obstacles.append(
-                        pyglet_rendering.make_circle(
-                            radius=to_pixel(self.hazards[i]['radius'], shift=0),
-                            filled=True,
-                        ),
-                    )
-                    obs_trans = pyglet_rendering.Transform(
-                        translation=(
-                            to_pixel(self.hazards[i]['location'][0], shift=screen_width / 2),
-                            to_pixel(self.hazards[i]['location'][1], shift=screen_height / 2),
-                        ),
-                    )
-                    obstacles[i].set_color(1.0, 0.0, 0.0)
-                    obstacles[i].add_attr(obs_trans)
-                elif self.hazards[i]['type'] == 'polygon':
-                    obstacles.append(
-                        pyglet_rendering.make_polygon(
-                            to_pixel(
-                                self.hazards[i]['vertices'],
-                                shift=[screen_width / 2, screen_height / 2],
-                            ),
-                            filled=True,
-                        ),
-                    )
-                self.viewer.add_geom(obstacles[i])
-
-            # Make Goal
-            goal = pyglet_rendering.make_circle(radius=to_pixel(0.1, shift=0), filled=True)
-            goal_trans = pyglet_rendering.Transform(
-                translation=(
-                    to_pixel(self.goal_pos[0], shift=screen_width / 2),
-                    to_pixel(self.goal_pos[1], shift=screen_height / 2),
-                ),
-            )
-            goal.add_attr(goal_trans)
-            goal.set_color(0.0, 0.5, 0.0)
-            self.viewer.add_geom(goal)
-
-            # Make Robot
-            self.robot = pyglet_rendering.make_circle(radius=to_pixel(0.1), filled=True)
-            self.robot_trans = pyglet_rendering.Transform(
-                translation=(
-                    to_pixel(self.state[0], shift=screen_width / 2),
-                    to_pixel(self.state[1], shift=screen_height / 2),
-                ),
-            )
-            self.robot_trans.set_rotation(self.state[2])
-            self.robot.add_attr(self.robot_trans)
-            self.robot.set_color(0.5, 0.5, 0.8)
-            self.viewer.add_geom(self.robot)
-            self.robot_orientation = pyglet_rendering.Line(start=(0.0, 0.0), end=(15.0, 0.0))
-            self.robot_orientation.linewidth.stroke = 2
-            self.robot_orientation.add_attr(self.robot_trans)
-            self.robot_orientation.set_color(0, 0, 0)
-            self.viewer.add_geom(self.robot_orientation)
-
-        if self.state is None:
-            return None
-
-        self.robot_trans.set_translation(
-            to_pixel(self.state[0], shift=screen_width / 2),
-            to_pixel(self.state[1], shift=screen_height / 2),
-        )
-        self.robot_trans.set_rotation(self.state[2])
-
-        return self.viewer.render(return_rgb_array=mode == 'rgb_array')
-
-    def get_obs(self) -> np.ndarray:
-        """Given the state, this function returns corresponding observation.
-
-        Returns:
-          Observation: [pos_x, pos_y, cos(theta), sin(theta), xdir2goal, ydir2goal, exp(-dist2goal)]
-        """
-
-        rel_loc = self.goal_pos - self.state[:2]
-        goal_dist = np.linalg.norm(rel_loc)
-        goal_compass = self.obs_compass()  # compass to the goal
-
-        return np.array(
-            [
-                self.state[0],
-                self.state[1],
-                np.cos(self.state[2]),
-                np.sin(self.state[2]),
-                goal_compass[0],
-                goal_compass[1],
-                np.exp(-goal_dist),
-            ],
-        )
-
-    def _get_dynamics(self) -> tuple[Callable, Callable]:
-
-        def get_f(state: np.ndarray) -> np.ndarray:
-            """Function to compute the drift dynamics 'f(x)' of the system."""
-            return np.zeros(state.shape)
-
-        def get_g(state: np.ndarray) -> np.ndarray:
-            """Function to compute the control dynamics 'g(x)' of the system."""
-            theta = state[2]
-            return np.array([[np.cos(theta), 0], [np.sin(theta), 0], [0, 1.0]])
-
-        return get_f, get_g
-
-    def obs_compass(self) -> np.ndarray:
-        # Get ego vector in world frame
-        vec = self.goal_pos - self.state[:2]
-        # Rotate into frame
-        R = np.array(
-            [
-                [np.cos(self.state[2]), -np.sin(self.state[2])],
-                [np.sin(self.state[2]), np.cos(self.state[2])],
-            ],
-        )
-        vec = np.matmul(vec, R)
-        # Normalize
-        vec /= np.sqrt(np.sum(np.square(vec))) + 0.001
-        return vec
-
-    def _goal_dist(self) -> np.ndarray:
-        return np.linalg.norm(self.goal_pos - self.state[:2])
-
-    def close(self) -> None:
-        if self.viewer:
-            self.viewer.close()
-            self.viewer = None
diff --git a/omnisafe/evaluator.py b/omnisafe/evaluator.py
index c94c38389..a1240a334 100644
--- a/omnisafe/evaluator.py
+++ b/omnisafe/evaluator.py
@@ -39,7 +39,6 @@
     SafeARCPlanner,
 )
 from omnisafe.common import Normalizer
-<<<<<<< HEAD
 from omnisafe.common.control_barrier_function.crabs.models import (
     AddGaussianNoise,
     CrabsCore,
@@ -50,12 +49,10 @@
 from omnisafe.common.control_barrier_function.crabs.optimizers import Barrier
 from omnisafe.common.control_barrier_function.crabs.utils import Normalizer as CRABSNormalizer
 from omnisafe.common.control_barrier_function.crabs.utils import create_model_and_trainer
-=======
 from omnisafe.common.barrier_comp import BarrierCompensator
 from omnisafe.common.barrier_solver import PendulumSolver
 from omnisafe.common.robust_barrier_solver import CBFQPLayer
 from omnisafe.common.robust_gp_model import DynamicsModel
->>>>>>> wip
 from omnisafe.envs.core import CMDP, make
 from omnisafe.envs.wrapper import ActionRepeat, ActionScale, ObsNormalize, TimeLimit
 from omnisafe.models.actor import ActorBuilder
@@ -648,6 +645,33 @@ def render(  # pylint: disable=too-many-locals,too-many-arguments,too-many-branc
                         ).reshape(
                             -1,  # to make sure the shape is (act_dim,)
                         )
+                        if self._cfgs['algo'] == 'DDPGCBF' or self._cfgs['algo'] == 'TRPOCBF':
+                            approx_compensating_act = self._compensator(obs=obs)
+                            compensated_act_mean_raw = act + approx_compensating_act
+                            [f, g, x, std] = self._solver.get_gp_dynamics(obs, use_prev_model=False)
+                            compensating_act = self._solver.control_barrier(
+                                compensated_act_mean_raw,
+                                f,
+                                g,
+                                x,
+                                std,
+                            )
+                            act = compensated_act_mean_raw + compensating_act
+
+                        if self._cfgs['algo'] == 'SACRCBF':
+                            state_batch = self._dynamics_model.get_state(obs)
+                            mean_pred_batch, sigma_pred_batch = (
+                                self._dynamics_model.predict_disturbance(
+                                    state_batch,
+                                )
+                            )
+                            safe_act = self._solver.get_safe_action(
+                                state_batch,
+                                act,
+                                mean_pred_batch,
+                                sigma_pred_batch,
+                            )
+                            act = safe_act
                     elif self._planner is not None:
                         act = self._planner.output_action(
                             obs.unsqueeze(0).to('cpu'),
diff --git a/tests/test_policy.py b/tests/test_policy.py
index 79810d0b9..8492e2193 100644
--- a/tests/test_policy.py
+++ b/tests/test_policy.py
@@ -38,6 +38,8 @@
 pid_lagrange_policy = ['TRPOPID', 'CPPOPID']
 early_terminated_policy = ['TRPOEarlyTerminated', 'PPOEarlyTerminated']
 offline_policy = ['BCQ', 'BCQLag', 'CRR', 'CCRR', 'VAEBC']
+cbf_policy = ['TRPOCBF', 'DDPGCBF', 'PPOBetaCBF']
+auto_alpha = [True, False]
 
 model_cfgs = {
     'linear_lr_decay': True,
@@ -52,6 +54,52 @@
 optim_case = [0, 1, 2, 3, 4]
 
 
+@helpers.parametrize(algo=cbf_policy)
+def test_cbf(algo):
+    env_id = 'Pendulum-v1'
+
+    custom_cfgs = {
+        'train_cfgs': {
+            'total_steps': 200,
+            'vector_env_nums': 1,
+            'torch_threads': 4,
+        },
+        'algo_cfgs': {
+            'steps_per_epoch': 200,
+        },
+        'logger_cfgs': {
+            'use_wandb': False,
+            'save_model_freq': 1,
+        },
+    }
+    agent = omnisafe.Agent(algo, env_id, custom_cfgs=custom_cfgs)
+    agent.learn()
+
+
+@helpers.parametrize(auto_alpha=auto_alpha)
+def test_rcbf(auto_alpha):
+    env_id = 'Unicycle'
+
+    custom_cfgs = {
+        'train_cfgs': {
+            'total_steps': 1000,
+            'vector_env_nums': 1,
+            'torch_threads': 4,
+        },
+        'algo_cfgs': {
+            'start_learning_steps': 998,
+            'update_iters': 1,
+            'auto_alpha': auto_alpha,
+        },
+        'logger_cfgs': {
+            'use_wandb': False,
+            'save_model_freq': 1,
+        },
+    }
+    agent = omnisafe.Agent('SACRCBF', env_id, custom_cfgs=custom_cfgs)
+    agent.learn()
+
+
 @helpers.parametrize(optim_case=optim_case)
 def test_cpo(optim_case):
     agent = omnisafe.Agent('CPO', 'Test-v0', custom_cfgs={})
@@ -337,9 +385,6 @@ def test_off_lag_policy(algo):
     agent.learn()
 
 
-auto_alpha = [True, False]
-
-
 @helpers.parametrize(auto_alpha=auto_alpha)
 def test_sac_policy(auto_alpha):
     """Test sac algorithms."""

From 259975af3dfac3b601c071c3d6a2c976dec3b3a0 Mon Sep 17 00:00:00 2001
From: Gaiejj <524339208@qq.com>
Date: Wed, 8 May 2024 23:13:40 +0800
Subject: [PATCH 05/18] chore: update pytest

---
 omnisafe/envs/__init__.py                     |   5 +-
 .../envs_from_cbf.py => cbf_env.py}           |   0
 omnisafe/envs/classic_control/__init__.py     |   4 +-
 .../envs/classic_control/envs_from_rcbf.py    | 168 +---------------
 omnisafe/envs/rcbf_env.py                     | 187 ++++++++++++++++++
 omnisafe/evaluator.py                         |   4 +-
 6 files changed, 193 insertions(+), 175 deletions(-)
 rename omnisafe/envs/{classic_control/envs_from_cbf.py => cbf_env.py} (100%)
 create mode 100644 omnisafe/envs/rcbf_env.py

diff --git a/omnisafe/envs/__init__.py b/omnisafe/envs/__init__.py
index fb1bf03b1..095a1134c 100644
--- a/omnisafe/envs/__init__.py
+++ b/omnisafe/envs/__init__.py
@@ -15,14 +15,13 @@
 """Environment API for OmniSafe."""
 
 from omnisafe.envs import classic_control
-from omnisafe.envs.classic_control.envs_from_cbf import BarrierFunctionEnv
-from omnisafe.envs.classic_control.envs_from_rcbf import RobustBarrierFunctionEnv
+from omnisafe.envs.cbf_env import BarrierFunctionEnv
 from omnisafe.envs.core import CMDP, env_register, make, support_envs
 from omnisafe.envs.crabs_env import CRABSEnv
 from omnisafe.envs.custom_env import CustomEnv
 from omnisafe.envs.meta_drive_env import SafetyMetaDriveEnv
-from omnisafe.envs.barrier_function_env import BarrierFunctionEnv
 from omnisafe.envs.mujoco_env import MujocoEnv
+from omnisafe.envs.rcbf_env import RobustBarrierFunctionEnv
 from omnisafe.envs.safety_gymnasium_env import SafetyGymnasiumEnv
 from omnisafe.envs.safety_gymnasium_modelbased import SafetyGymnasiumModelBased
 from omnisafe.envs.safety_isaac_gym_env import SafetyIsaacGymEnv
diff --git a/omnisafe/envs/classic_control/envs_from_cbf.py b/omnisafe/envs/cbf_env.py
similarity index 100%
rename from omnisafe/envs/classic_control/envs_from_cbf.py
rename to omnisafe/envs/cbf_env.py
diff --git a/omnisafe/envs/classic_control/__init__.py b/omnisafe/envs/classic_control/__init__.py
index 9d5a3ba99..9c8e7b35a 100644
--- a/omnisafe/envs/classic_control/__init__.py
+++ b/omnisafe/envs/classic_control/__init__.py
@@ -14,6 +14,4 @@
 # ==============================================================================
 """Environment implementations from papers."""
 
-from omnisafe.envs.classic_control import envs_from_crabs
-from omnisafe.envs.classic_control.envs_from_cbf import BarrierFunctionEnv
-from omnisafe.envs.classic_control.envs_from_rcbf import RobustBarrierFunctionEnv
+from omnisafe.envs.classic_control import envs_from_crabs, envs_from_rcbf
diff --git a/omnisafe/envs/classic_control/envs_from_rcbf.py b/omnisafe/envs/classic_control/envs_from_rcbf.py
index e97aaaea5..bdf469876 100644
--- a/omnisafe/envs/classic_control/envs_from_rcbf.py
+++ b/omnisafe/envs/classic_control/envs_from_rcbf.py
@@ -20,16 +20,12 @@
 from __future__ import annotations
 
 from collections.abc import Iterable
-from typing import Any, Callable, ClassVar
+from typing import Any, Callable
 
 import gymnasium
 import numpy as np
-import torch
 from gymnasium import spaces
 
-from omnisafe.envs.core import CMDP, env_register
-from omnisafe.typing import Box
-
 
 def to_pixel(meas_cm: list[float] | float, shift: int = 0) -> float:
     if isinstance(meas_cm, Iterable):
@@ -171,10 +167,7 @@ def get_obs(self) -> np.ndarray:
 
     def obs_compass(self) -> np.ndarray:
         """Return a robot-centric compass observation of a list of positions."""
-
-        # Get ego vector in world frame
         vec = self.goal_pos - self.state[:2]
-        # Rotate into frame
         R = np.array(
             [
                 [np.cos(self.state[2]), -np.sin(self.state[2])],
@@ -182,7 +175,6 @@ def obs_compass(self) -> np.ndarray:
             ],
         )
         vec = np.matmul(vec, R)
-        # Normalize
         vec /= np.sqrt(np.sum(np.square(vec))) + 0.001
         return vec
 
@@ -208,161 +200,3 @@ def close(self) -> None:
         if self.viewer:
             self.viewer.close()
             self.viewer = None
-
-
-@env_register
-class RobustBarrierFunctionEnv(CMDP):
-    """Interface of control barrier function-based environments.
-
-    .. warning::
-        Since environments based on control barrier functions require special judgment and control
-        of environmental dynamics, they do not support the use of vectorized environments for
-        parallelization.
-
-    Attributes:
-        need_auto_reset_wrapper (bool): Whether to use auto reset wrapper.
-        need_time_limit_wrapper (bool): Whether to use time limit wrapper.
-    """
-
-    need_auto_reset_wrapper = True
-    need_time_limit_wrapper = False
-    _support_envs: ClassVar[list[str]] = [
-        'Unicycle',
-    ]
-
-    def __init__(
-        self,
-        env_id: str,
-        num_envs: int = 1,
-        device: str = 'cpu',
-        **kwargs: Any,
-    ) -> None:
-        """Initialize the environment.
-
-        Args:
-            env_id (str): Environment id.
-            num_envs (int, optional): Number of environments. Defaults to 1.
-            device (torch.device, optional): Device to store the data. Defaults to 'cpu'.
-
-        Keyword Args:
-            render_mode (str, optional): The render mode, ranging from ``human``, ``rgb_array``, ``rgb_array_list``.
-                Defaults to ``rgb_array``.
-            camera_name (str, optional): The camera name.
-            camera_id (int, optional): The camera id.
-            width (int, optional): The width of the rendered image. Defaults to 256.
-            height (int, optional): The height of the rendered image. Defaults to 256.
-        """
-        super().__init__(env_id)
-        self._env_id = env_id
-        if num_envs == 1:
-            if self._env_id == 'Unicycle':
-                self._env = UnicycleEnv()
-            else:
-                raise NotImplementedError('Only support Unicycle now.')
-            assert isinstance(self._env.action_space, Box), 'Only support Box action space.'
-            assert isinstance(
-                self._env.observation_space,
-                Box,
-            ), 'Only support Box observation space.'
-            self._action_space = self._env.action_space
-            self._observation_space = self._env.observation_space
-        else:
-            raise NotImplementedError('Only support num_envs=1 now.')
-        self._device = torch.device(device)
-
-        self._num_envs = num_envs
-        self._metadata = self._env.metadata
-
-    def step(
-        self,
-        action: torch.Tensor,
-    ) -> tuple[
-        torch.Tensor,
-        torch.Tensor,
-        torch.Tensor,
-        torch.Tensor,
-        torch.Tensor,
-        dict[str, Any],
-    ]:
-        """Step the environment.
-
-        .. note::
-
-            OmniSafe use auto reset wrapper to reset the environment when the episode is
-            terminated. So the ``obs`` will be the first observation of the next episode.
-            And the true ``final_observation`` in ``info`` will be stored in the ``final_observation`` key of ``info``.
-
-        Args:
-            action (torch.Tensor): Action to take.
-
-        Returns:
-            observation: Agent's observation of the current environment.
-            reward: Amount of reward returned after previous action.
-            cost: Amount of cost returned after previous action.
-            terminated: Whether the episode has ended.
-            truncated: Whether the episode has been truncated due to a time limit.
-            info: Auxiliary diagnostic information (helpful for debugging, and sometimes learning).
-        """
-        obs, reward, cost, terminated, truncated, info = self._env.step(
-            action.detach().cpu().numpy(),
-        )
-        obs, reward, cost, terminated, truncated = (
-            torch.as_tensor(x, dtype=torch.float32, device=self._device)
-            for x in (obs, reward, cost, terminated, truncated)
-        )
-        if 'final_observation' in info:
-            info['final_observation'] = np.array(
-                [
-                    array if array is not None else np.zeros(obs.shape[-1])
-                    for array in info['final_observation']
-                ],
-            )
-            info['final_observation'] = torch.as_tensor(
-                info['final_observation'],
-                dtype=torch.float32,
-                device=self._device,
-            )
-
-        return obs, reward, cost, terminated, truncated, info
-
-    def reset(
-        self,
-        seed: int | None = None,
-        options: dict[str, Any] | None = None,
-    ) -> tuple[torch.Tensor, dict]:
-        """Reset the environment.
-
-        Args:
-            seed (int, optional): The random seed. Defaults to None.
-            options (dict[str, Any], optional): The options for the environment. Defaults to None.
-
-        Returns:
-            observation: Agent's observation of the current environment.
-            info: Auxiliary diagnostic information (helpful for debugging, and sometimes learning).
-        """
-        obs, info = self._env.reset(seed=seed, options=options)
-        return torch.as_tensor(obs, dtype=torch.float32, device=self._device), info
-
-    def set_seed(self, seed: int) -> None:
-        """Set the seed for the environment.
-
-        Args:
-            seed (int): Seed to set.
-        """
-        self.reset(seed=seed)
-
-    def render(self) -> Any:
-        """Render the environment.
-
-        Returns:
-            Rendered environment.
-        """
-        return self._env.render()
-
-    def close(self) -> None:
-        """Close the environment."""
-        self._env.close()
-
-    def __getattr__(self, name: str) -> Any:
-        """Return the unwrapped environment attributes."""
-        return getattr(self._env, name)
diff --git a/omnisafe/envs/rcbf_env.py b/omnisafe/envs/rcbf_env.py
new file mode 100644
index 000000000..f97586dc3
--- /dev/null
+++ b/omnisafe/envs/rcbf_env.py
@@ -0,0 +1,187 @@
+# Copyright 2023 OmniSafe Team. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Interface of control barrier function-based environments."""
+
+# mypy: ignore-errors
+# pylint: disable=all
+
+from __future__ import annotations
+
+from typing import Any, ClassVar
+
+import numpy as np
+import torch
+
+from omnisafe.envs.classic_control.envs_from_rcbf import UnicycleEnv
+from omnisafe.envs.core import CMDP, env_register
+from omnisafe.typing import Box
+
+
+@env_register
+class RobustBarrierFunctionEnv(CMDP):
+    """Interface of control barrier function-based environments.
+
+    .. warning::
+        Since environments based on control barrier functions require special judgment and control
+        of environmental dynamics, they do not support the use of vectorized environments for
+        parallelization.
+
+    Attributes:
+        need_auto_reset_wrapper (bool): Whether to use auto reset wrapper.
+        need_time_limit_wrapper (bool): Whether to use time limit wrapper.
+    """
+
+    need_auto_reset_wrapper = True
+    need_time_limit_wrapper = False
+    _support_envs: ClassVar[list[str]] = [
+        'Unicycle',
+    ]
+
+    def __init__(
+        self,
+        env_id: str,
+        num_envs: int = 1,
+        device: str = 'cpu',
+        **kwargs: Any,
+    ) -> None:
+        """Initialize the environment.
+
+        Args:
+            env_id (str): Environment id.
+            num_envs (int, optional): Number of environments. Defaults to 1.
+            device (torch.device, optional): Device to store the data. Defaults to 'cpu'.
+
+        Keyword Args:
+            render_mode (str, optional): The render mode, ranging from ``human``, ``rgb_array``, ``rgb_array_list``.
+                Defaults to ``rgb_array``.
+            camera_name (str, optional): The camera name.
+            camera_id (int, optional): The camera id.
+            width (int, optional): The width of the rendered image. Defaults to 256.
+            height (int, optional): The height of the rendered image. Defaults to 256.
+        """
+        super().__init__(env_id)
+        self._env_id = env_id
+        if num_envs == 1:
+            if self._env_id == 'Unicycle':
+                self._env = UnicycleEnv()
+            else:
+                raise NotImplementedError('Only support Unicycle now.')
+            assert isinstance(self._env.action_space, Box), 'Only support Box action space.'
+            assert isinstance(
+                self._env.observation_space,
+                Box,
+            ), 'Only support Box observation space.'
+            self._action_space = self._env.action_space
+            self._observation_space = self._env.observation_space
+        else:
+            raise NotImplementedError('Only support num_envs=1 now.')
+        self._device = torch.device(device)
+
+        self._num_envs = num_envs
+        self._metadata = self._env.metadata
+
+    def step(
+        self,
+        action: torch.Tensor,
+    ) -> tuple[
+        torch.Tensor,
+        torch.Tensor,
+        torch.Tensor,
+        torch.Tensor,
+        torch.Tensor,
+        dict[str, Any],
+    ]:
+        """Step the environment.
+
+        .. note::
+
+            OmniSafe use auto reset wrapper to reset the environment when the episode is
+            terminated. So the ``obs`` will be the first observation of the next episode.
+            And the true ``final_observation`` in ``info`` will be stored in the ``final_observation`` key of ``info``.
+
+        Args:
+            action (torch.Tensor): Action to take.
+
+        Returns:
+            observation: Agent's observation of the current environment.
+            reward: Amount of reward returned after previous action.
+            cost: Amount of cost returned after previous action.
+            terminated: Whether the episode has ended.
+            truncated: Whether the episode has been truncated due to a time limit.
+            info: Auxiliary diagnostic information (helpful for debugging, and sometimes learning).
+        """
+        obs, reward, cost, terminated, truncated, info = self._env.step(
+            action.detach().cpu().numpy(),
+        )
+        obs, reward, cost, terminated, truncated = (
+            torch.as_tensor(x, dtype=torch.float32, device=self._device)
+            for x in (obs, reward, cost, terminated, truncated)
+        )
+        if 'final_observation' in info:
+            info['final_observation'] = np.array(
+                [
+                    array if array is not None else np.zeros(obs.shape[-1])
+                    for array in info['final_observation']
+                ],
+            )
+            info['final_observation'] = torch.as_tensor(
+                info['final_observation'],
+                dtype=torch.float32,
+                device=self._device,
+            )
+
+        return obs, reward, cost, terminated, truncated, info
+
+    def reset(
+        self,
+        seed: int | None = None,
+        options: dict[str, Any] | None = None,
+    ) -> tuple[torch.Tensor, dict]:
+        """Reset the environment.
+
+        Args:
+            seed (int, optional): The random seed. Defaults to None.
+            options (dict[str, Any], optional): The options for the environment. Defaults to None.
+
+        Returns:
+            observation: Agent's observation of the current environment.
+            info: Auxiliary diagnostic information (helpful for debugging, and sometimes learning).
+        """
+        obs, info = self._env.reset(seed=seed, options=options)
+        return torch.as_tensor(obs, dtype=torch.float32, device=self._device), info
+
+    def set_seed(self, seed: int) -> None:
+        """Set the seed for the environment.
+
+        Args:
+            seed (int): Seed to set.
+        """
+        self.reset(seed=seed)
+
+    def render(self) -> Any:
+        """Render the environment.
+
+        Returns:
+            Rendered environment.
+        """
+        return self._env.render()
+
+    def close(self) -> None:
+        """Close the environment."""
+        self._env.close()
+
+    def __getattr__(self, name: str) -> Any:
+        """Return the unwrapped environment attributes."""
+        return getattr(self._env, name)
diff --git a/omnisafe/evaluator.py b/omnisafe/evaluator.py
index a1240a334..90535d931 100644
--- a/omnisafe/evaluator.py
+++ b/omnisafe/evaluator.py
@@ -39,6 +39,8 @@
     SafeARCPlanner,
 )
 from omnisafe.common import Normalizer
+from omnisafe.common.barrier_comp import BarrierCompensator
+from omnisafe.common.barrier_solver import PendulumSolver
 from omnisafe.common.control_barrier_function.crabs.models import (
     AddGaussianNoise,
     CrabsCore,
@@ -49,8 +51,6 @@
 from omnisafe.common.control_barrier_function.crabs.optimizers import Barrier
 from omnisafe.common.control_barrier_function.crabs.utils import Normalizer as CRABSNormalizer
 from omnisafe.common.control_barrier_function.crabs.utils import create_model_and_trainer
-from omnisafe.common.barrier_comp import BarrierCompensator
-from omnisafe.common.barrier_solver import PendulumSolver
 from omnisafe.common.robust_barrier_solver import CBFQPLayer
 from omnisafe.common.robust_gp_model import DynamicsModel
 from omnisafe.envs.core import CMDP, make

From 08e926c1f7bb3a6c0098abbbcee9799c28264955 Mon Sep 17 00:00:00 2001
From: Gaiejj <524339208@qq.com>
Date: Wed, 8 May 2024 23:44:04 +0800
Subject: [PATCH 06/18] chore: update pytest

---
 .pre-commit-config.yaml                          |  2 +-
 omnisafe/adapter/barrier_function_adapter.py     | 12 ++++++------
 .../adapter/beta_barrier_function_adapter.py     | 16 ++++++----------
 .../offpolicy_barrier_function_adapter.py        |  2 +-
 .../adapter/robust_barrier_function_adapter.py   |  4 ++--
 omnisafe/algorithms/off_policy/sac_rcbf.py       |  2 +-
 .../on_policy/barrier_function/ppo_cbf.py        |  4 ----
 omnisafe/envs/cbf_env.py                         |  2 +-
 omnisafe/envs/classic_control/envs_from_rcbf.py  | 16 +++++-----------
 omnisafe/envs/rcbf_env.py                        |  2 +-
 10 files changed, 24 insertions(+), 38 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 63f378224..4b40fedd1 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -114,7 +114,7 @@ repos:
             ^tests/|
             ^setup.py$|
             ^omnisafe/envs/classic_control/envs_from_crabs.py$|
-            ^omnisafe/envs/classic_control/envs_from_rcbf.py|
+            ^omnisafe/envs/classic_control/envs_from_rcbf.py$|
             ^omnisafe/common/control_barrier_function/crabs/models.py$|
             ^omnisafe/common/control_barrier_function/crabs/optimizers.py$|
             ^omnisafe/common/control_barrier_function/crabs/utils.py$|
diff --git a/omnisafe/adapter/barrier_function_adapter.py b/omnisafe/adapter/barrier_function_adapter.py
index 469f4e7cd..80b45eecf 100644
--- a/omnisafe/adapter/barrier_function_adapter.py
+++ b/omnisafe/adapter/barrier_function_adapter.py
@@ -1,4 +1,4 @@
-# Copyright 2023 OmniSafe Team. All Rights Reserved.
+# Copyright 2024 OmniSafe Team. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""BarrierFunction Adapter for OmniSafe."""
+"""Barrier Function Adapter for OmniSafe."""
 
 from __future__ import annotations
 
@@ -31,11 +31,11 @@
 
 
 class BarrierFunctionAdapter(OnPolicyAdapter):
-    """BarrierFunction Adapter for OmniSafe.
+    """Barrier Function Adapter for OmniSafe.
 
-    The BarrierFunction Adapter is used to establish the logic of interaction between agents and the
-    environment based on control barrier functions. Its key feature is the introduction of action
-    compensators and barrier function solvers.
+    The Barrier Function Adapter is used to establish the logic of interaction between agents and
+    the environment based on control barrier functions. Its key feature is the introduction of
+    action compensators and barrier function solvers.
 
     Args:
         env_id (str): The environment id.
diff --git a/omnisafe/adapter/beta_barrier_function_adapter.py b/omnisafe/adapter/beta_barrier_function_adapter.py
index d5738e02d..f0bc50af8 100644
--- a/omnisafe/adapter/beta_barrier_function_adapter.py
+++ b/omnisafe/adapter/beta_barrier_function_adapter.py
@@ -1,4 +1,4 @@
-# Copyright 2023 OmniSafe Team. All Rights Reserved.
+# Copyright 2024 OmniSafe Team. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -120,11 +120,7 @@ def vectorized_f_(obs: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
 
 
 class BetaBarrierFunctionAdapter(OnPolicyAdapter):
-    """BarrierFunction Adapter for OmniSafe.
-
-    The BarrierFunction Adapter is used to establish the logic of interaction between agents and the
-    environment based on control barrier functions. Its key feature is the introduction of action
-    compensators and barrier function solvers.
+    """Barrier Function Adapter with Beta Distribution for OmniSafe.
 
     Args:
         env_id (str): The environment id.
@@ -134,7 +130,7 @@ class BetaBarrierFunctionAdapter(OnPolicyAdapter):
     """
 
     def __init__(self, env_id: str, num_envs: int, seed: int, cfgs: Config) -> None:
-        """Initialize an instance of :class:`BarrierFunctionAdapter`."""
+        """Initialize an instance of :class:`BetaBarrierFunctionAdapte`."""
         super().__init__(env_id, num_envs, seed, cfgs)
         self.constraint_fn: Callable = vectorize_f(cbf)
 
@@ -147,9 +143,9 @@ def _wrapper(
         """Wrapper the environment.
 
         .. warning::
-            Since solving the optimization problem requires obtaining physical quantities with practical
-            significance from state observations, the Barrier Function Adapter does not support
-            normalization of observations.
+            Since solving the optimization problem requires obtaining physical quantities with
+            practical significance from state observations, the Beta Barrier Function Adapter does
+            not support normalization of observations.
 
         Args:
             obs_normalize (bool, optional): Whether to normalize the observation. Defaults to False.
diff --git a/omnisafe/adapter/offpolicy_barrier_function_adapter.py b/omnisafe/adapter/offpolicy_barrier_function_adapter.py
index f40a7add9..e6bff40d6 100644
--- a/omnisafe/adapter/offpolicy_barrier_function_adapter.py
+++ b/omnisafe/adapter/offpolicy_barrier_function_adapter.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""BarrierFunction OffPolicy Adapter for OmniSafe."""
+"""OffPolicy Barrier Function Adapter for OmniSafe."""
 
 from __future__ import annotations
 
diff --git a/omnisafe/adapter/robust_barrier_function_adapter.py b/omnisafe/adapter/robust_barrier_function_adapter.py
index 8da2cf658..a7c6dc394 100644
--- a/omnisafe/adapter/robust_barrier_function_adapter.py
+++ b/omnisafe/adapter/robust_barrier_function_adapter.py
@@ -1,4 +1,4 @@
-# Copyright 2023 OmniSafe Team. All Rights Reserved.
+# Copyright 2024 OmniSafe Team. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""BarrierFunction Adapter for OmniSafe."""
+"""Robust Barrier Function Adapter for OmniSafe."""
 
 from __future__ import annotations
 
diff --git a/omnisafe/algorithms/off_policy/sac_rcbf.py b/omnisafe/algorithms/off_policy/sac_rcbf.py
index 1e9547369..e1a351020 100644
--- a/omnisafe/algorithms/off_policy/sac_rcbf.py
+++ b/omnisafe/algorithms/off_policy/sac_rcbf.py
@@ -37,7 +37,7 @@ class SACRCBF(SAC):
     """The Soft Actor-Critic algorithm with Robust Control Barrier Function.
 
     References:
-        - Title: Soft Actor-Critic: Off-Policy Maximum Entropy Deep Reinforcement Learning with a Stochastic Actor
+        - Title: The Soft Actor-Critic algorithm with Robust Control Barrier Function
         - Authors: Tuomas Haarnoja, Aurick Zhou, Pieter Abbeel, Sergey Levine.
         - URL: `SAC <https://arxiv.org/abs/1801.01290>`_
     """
diff --git a/omnisafe/algorithms/on_policy/barrier_function/ppo_cbf.py b/omnisafe/algorithms/on_policy/barrier_function/ppo_cbf.py
index b77c36c76..1b46857e6 100644
--- a/omnisafe/algorithms/on_policy/barrier_function/ppo_cbf.py
+++ b/omnisafe/algorithms/on_policy/barrier_function/ppo_cbf.py
@@ -35,10 +35,6 @@ class PPOBetaCBF(PPO):
         - URL: `PPOBetaCBF <https://proceedings.mlr.press/v238/suttle24a/suttle24a.pdf>`_
     """
 
-    def _init_log(self) -> None:
-        super()._init_log()
-        self._logger.register_key('Value/Loss_compensator')
-
     def _init_env(self) -> None:
         self._env: BetaBarrierFunctionAdapter = BetaBarrierFunctionAdapter(
             self._env_id,
diff --git a/omnisafe/envs/cbf_env.py b/omnisafe/envs/cbf_env.py
index c46012b8d..a46e91c94 100644
--- a/omnisafe/envs/cbf_env.py
+++ b/omnisafe/envs/cbf_env.py
@@ -1,4 +1,4 @@
-# Copyright 2023 OmniSafe Team. All Rights Reserved.
+# Copyright 2024 OmniSafe Team. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/omnisafe/envs/classic_control/envs_from_rcbf.py b/omnisafe/envs/classic_control/envs_from_rcbf.py
index bdf469876..33e13189c 100644
--- a/omnisafe/envs/classic_control/envs_from_rcbf.py
+++ b/omnisafe/envs/classic_control/envs_from_rcbf.py
@@ -1,4 +1,4 @@
-# Copyright 2023 OmniSafe Team. All Rights Reserved.
+# Copyright 2024 OmniSafe Team. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -19,7 +19,6 @@
 
 from __future__ import annotations
 
-from collections.abc import Iterable
 from typing import Any, Callable
 
 import gymnasium
@@ -27,17 +26,11 @@
 from gymnasium import spaces
 
 
-def to_pixel(meas_cm: list[float] | float, shift: int = 0) -> float:
-    if isinstance(meas_cm, Iterable):
-        return 1.5 * 37.795 * meas_cm + np.array(shift)
-
-    return 1.5 * 37.795 * meas_cm + shift
-
-
 class UnicycleEnv(gymnasium.Env):
+    """Environment from `The Soft Actor-Critic algorithm with Robust Control Barrier Function`."""
 
     def __init__(self) -> None:
-
+        """Initialize the unicycle environment."""
         super().__init__()
 
         self.dynamics_mode = 'Unicycle'
@@ -124,9 +117,11 @@ def _step(self, action: np.ndarray) -> tuple:
         return self.state, reward, cost, terminated, truncated, {}
 
     def goal_met(self) -> bool:
+        """Return whether meeting the goal."""
         return np.linalg.norm(self.state[:2] - self.goal_pos) <= self.goal_size
 
     def reset(self, seed: int | None = None, options: dict | None = None) -> tuple:
+        """Reset the environment."""
         self.episode_step = 0
 
         if self.rand_init:
@@ -148,7 +143,6 @@ def get_obs(self) -> np.ndarray:
         Returns:
           Observation: np.ndarray.
         """
-
         rel_loc = self.goal_pos - self.state[:2]
         goal_dist = np.linalg.norm(rel_loc)
         goal_compass = self.obs_compass()  # compass to the goal
diff --git a/omnisafe/envs/rcbf_env.py b/omnisafe/envs/rcbf_env.py
index f97586dc3..a8fbdcd28 100644
--- a/omnisafe/envs/rcbf_env.py
+++ b/omnisafe/envs/rcbf_env.py
@@ -1,4 +1,4 @@
-# Copyright 2023 OmniSafe Team. All Rights Reserved.
+# Copyright 2024 OmniSafe Team. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

From 6a18071c2746ab8e92148c81ebf510638d7127de Mon Sep 17 00:00:00 2001
From: Gaiejj <524339208@qq.com>
Date: Thu, 9 May 2024 18:09:01 +0800
Subject: [PATCH 07/18] chore: update pytest

---
 omnisafe/adapter/barrier_function_adapter.py  |  6 +-
 .../adapter/beta_barrier_function_adapter.py  | 24 ++-----
 omnisafe/algorithms/off_policy/ddpg.py        |  1 +
 .../on_policy/base/policy_gradient.py         |  1 +
 omnisafe/common/barrier_solver.py             |  2 +-
 omnisafe/common/robust_gp_model.py            | 67 +++++--------------
 omnisafe/models/actor/actor_builder.py        | 11 +--
 tests/test_policy.py                          |  1 +
 8 files changed, 37 insertions(+), 76 deletions(-)

diff --git a/omnisafe/adapter/barrier_function_adapter.py b/omnisafe/adapter/barrier_function_adapter.py
index 80b45eecf..c5581400a 100644
--- a/omnisafe/adapter/barrier_function_adapter.py
+++ b/omnisafe/adapter/barrier_function_adapter.py
@@ -60,9 +60,9 @@ def _wrapper(
         """Wrapper the environment.
 
         .. warning::
-            Since solving the optimization problem requires obtaining physical quantities with practical
-            significance from state observations, the Barrier Function Adapter does not support
-            normalization of observations.
+            Since solving the optimization problem requires obtaining physical quantities with
+            practical significance from state observations, the Barrier Function Adapter does not
+            support normalization of observations.
 
         Args:
             obs_normalize (bool, optional): Whether to normalize the observation. Defaults to False.
diff --git a/omnisafe/adapter/beta_barrier_function_adapter.py b/omnisafe/adapter/beta_barrier_function_adapter.py
index f0bc50af8..22bab63ff 100644
--- a/omnisafe/adapter/beta_barrier_function_adapter.py
+++ b/omnisafe/adapter/beta_barrier_function_adapter.py
@@ -98,18 +98,11 @@ def vectorized_f_(obs: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
         """
         obs = obs.cpu().detach().numpy()
 
-        if len(obs.shape) == 1:
-            batch_size = 1
-            lbs, ubs = f(obs)
-            lbs = torch.as_tensor(lbs)
-            ubs = torch.as_tensor(ubs)
-
-        else:
-            batch_size = obs.shape[0]
-            lbs = torch.zeros([batch_size, 1])
-            ubs = torch.zeros([batch_size, 1])
-            for i in range(batch_size):
-                lbs[i], ubs[i] = f(obs[i])
+        batch_size = obs.shape[0]
+        lbs = torch.zeros([batch_size, 1])
+        ubs = torch.zeros([batch_size, 1])
+        for i in range(batch_size):
+            lbs[i], ubs[i] = f(obs[i])
 
         lbs = torch.FloatTensor(lbs).reshape(batch_size, 1)
         ubs = torch.FloatTensor(ubs).reshape(batch_size, 1)
@@ -181,10 +174,6 @@ def rollout(  # pylint: disable=too-many-locals
     ) -> None:
         """Rollout the environment and store the data in the buffer.
 
-        .. warning::
-            As OmniSafe uses :class:`AutoReset` wrapper, the environment will be reset automatically,
-            so the final observation will be stored in ``info['final_observation']``.
-
         Args:
             steps_per_epoch (int): Number of steps per epoch.
             agent (ConstraintActorCritic): Constraint actor-critic, including actor , reward critic
@@ -206,9 +195,6 @@ def rollout(  # pylint: disable=too-many-locals
             next_obs, reward, cost, terminated, truncated, info = self.step(final_act)
 
             self._log_value(reward=reward, cost=cost, info=info)
-
-            if self._cfgs.algo_cfgs.use_cost:
-                logger.store({'Value/cost': value_c})
             logger.store({'Value/reward': value_r})
 
             buffer.store(
diff --git a/omnisafe/algorithms/off_policy/ddpg.py b/omnisafe/algorithms/off_policy/ddpg.py
index 2d6bad948..0ce31f286 100644
--- a/omnisafe/algorithms/off_policy/ddpg.py
+++ b/omnisafe/algorithms/off_policy/ddpg.py
@@ -190,6 +190,7 @@ def _init_log(self) -> None:
 
         self._log_what_to_save()
         self._logger.torch_save()
+        self._specific_save()
 
         self._logger.register_key(
             'Metrics/EpRet',
diff --git a/omnisafe/algorithms/on_policy/base/policy_gradient.py b/omnisafe/algorithms/on_policy/base/policy_gradient.py
index 831076de6..826ff7c1a 100644
--- a/omnisafe/algorithms/on_policy/base/policy_gradient.py
+++ b/omnisafe/algorithms/on_policy/base/policy_gradient.py
@@ -182,6 +182,7 @@ def _init_log(self) -> None:
 
         self._log_what_to_save()
         self._logger.torch_save()
+        self._specific_save()
 
         self._logger.register_key(
             'Metrics/EpRet',
diff --git a/omnisafe/common/barrier_solver.py b/omnisafe/common/barrier_solver.py
index ea287b4ad..e4471fb38 100644
--- a/omnisafe/common/barrier_solver.py
+++ b/omnisafe/common/barrier_solver.py
@@ -1,4 +1,4 @@
-# Copyright 2023 OmniSafe Team. All Rights Reserved.
+# Copyright 2024 OmniSafe Team. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/omnisafe/common/robust_gp_model.py b/omnisafe/common/robust_gp_model.py
index 885a50389..62679fd59 100644
--- a/omnisafe/common/robust_gp_model.py
+++ b/omnisafe/common/robust_gp_model.py
@@ -133,7 +133,7 @@ def __init__(
         self.model = self.model.to(self.device)
         warnings.filterwarnings('ignore')
 
-    def train(self, training_iter: int, verbose: bool = False) -> None:
+    def train(self, training_iter: int) -> None:
         """Trains the Gaussian Process model.
 
         Args:
@@ -145,17 +145,11 @@ def train(self, training_iter: int, verbose: bool = False) -> None:
         optimizer = torch.optim.Adam(self.model.parameters(), lr=0.1)
         mll = gpytorch.mlls.ExactMarginalLogLikelihood(self.likelihood, self.model)
 
-        for i in range(training_iter):
+        for _ in range(training_iter):
             optimizer.zero_grad()
             output = self.model(self._train_x)
             loss = -mll(output, self._train_y)
             loss.backward()
-            if verbose:
-                print(
-                    f'\tIter {i + 1}/{training_iter} - Loss: {loss.item():.3f}   lengthscale: '
-                    f'{self.model.covar_module.base_kernel.lengthscale.item():.3f}   noise: '
-                    f'{self.likelihood.noise.item():.3f}',
-                )
             optimizer.step()
 
     def predict(self, test_x: torch.Tensor) -> dict[str, torch.Tensor | np.ndarray]:
@@ -216,45 +210,27 @@ def __init__(
         self.n_s = DYNAMICS_MODE[self.env.dynamics_mode]['n_s']
         self.n_u = DYNAMICS_MODE[self.env.dynamics_mode]['n_u']
 
-        self._disturb_estimators = None
         self.disturbance_history = {}
         self.history_counter = 0
         self.max_history_count = gp_model_size
         self.disturbance_history['state'] = np.zeros((self.max_history_count, self.n_s))
         self.disturbance_history['disturbance'] = np.zeros((self.max_history_count, self.n_s))
-        self._train_x = None
-        self._train_y = None
-
-        self.l_p = l_p
+        self._train_x = np.zeros((self.max_history_count, self.n_s))
+        self._train_y = np.zeros((self.max_history_count, self.n_s))
+        self._disturb_estimators = []
         self.device = torch.device(device)
 
-    def predict_next_state(self, state_batch: np.ndarray, u_batch: np.ndarray) -> np.ndarray:
-        """Predicts the next state given the current state and action batch.
-
-        Args:
-            state_batch (np.ndarray): The batch of current states.
-            u_batch (np.ndarray): The batch of actions applied.
-
-        Returns:
-            np.ndarray: The batch of predicted next states.
-        """
-        expand_dims = len(state_batch.shape) == 1
-        if expand_dims:
-            state_batch = np.expand_dims(state_batch, axis=0)
-
-        next_state_batch = state_batch + self.env.dt * (
-            self.get_f(state_batch)
-            + (self.get_g(state_batch) @ np.expand_dims(u_batch, -1)).squeeze(-1)
-        )
-        pred_mean, pred_std = self.predict_disturbance(state_batch)
-        next_state_batch += self.env.dt * pred_mean
-
-        if expand_dims:
-            next_state_batch = next_state_batch.squeeze(0)
-            if pred_std is not None:
-                pred_std = pred_std.squeeze(0)
-
-        return next_state_batch
+        for i in range(self.n_s):
+            self._disturb_estimators.append(
+                GPyDisturbanceEstimator(
+                    np.zeros((self.max_history_count, self.n_s)),
+                    np.zeros((self.max_history_count, self.n_s)),
+                    MAX_STD[self.env.dynamics_mode][i],
+                    device=self.device,
+                ),
+            )
+        self._disturb_initialized = True
+        self.l_p = l_p
 
     def get_dynamics(self) -> tuple[Callable, Callable]:
         """Retrieves the dynamics functions for drift and control based on the environment's dynamics mode.
@@ -324,13 +300,6 @@ def append_transition(
             u_batch (np.ndarray): The batch of actions applied, shape (n_u,) or (batch_size, n_u).
             next_state_batch (np.ndarray): The batch of next states, shape (n_s,) or (batch_size, n_s).
         """
-        expand_dims = len(state_batch.shape) == 1
-
-        if expand_dims:
-            state_batch = np.expand_dims(state_batch, 0)
-            next_state_batch = np.expand_dims(next_state_batch, 0)
-            u_batch = np.expand_dims(u_batch, 0)
-
         u_batch = np.expand_dims(u_batch, -1)
         disturbance_batch = (
             next_state_batch
@@ -380,7 +349,7 @@ def fit_gp_model(self, training_iter: int = 70) -> None:
                 ),
             )
             self._disturb_estimators[i].train(training_iter)
-
+        self._disturb_initialized = False
         self._train_x = train_x
         self._train_y = train_y
 
@@ -404,7 +373,7 @@ def predict_disturbance(self, test_x: torch.Tensor) -> tuple[torch.Tensor, torch
         means = np.zeros(test_x.shape)
         f_std = np.zeros(test_x.shape)
 
-        if self._disturb_estimators:
+        if not self._disturb_initialized:
             train_x_std = np.std(self._train_x, axis=0)
             train_y_std = np.std(self._train_y, axis=0)
             test_x = test_x / train_x_std
diff --git a/omnisafe/models/actor/actor_builder.py b/omnisafe/models/actor/actor_builder.py
index 75358134c..3f0b3e4a6 100644
--- a/omnisafe/models/actor/actor_builder.py
+++ b/omnisafe/models/actor/actor_builder.py
@@ -61,10 +61,13 @@ def build_actor(
     ) -> Actor:
         """Build actor network.
 
-        Currently, we support the following actor types:
-            - ``gaussian_learning``: Gaussian actor with learnable standard deviation parameters.
-            - ``gaussian_sac``: Gaussian actor with learnable standard deviation network.
-            - ``mlp``: Multi-layer perceptron actor, used in ``DDPG`` and ``TD3``.
+        This method supports multiple actor types, each corresponding to a different class:
+            - `gaussian_learning`: Returns a GaussianLearningActor with learnable std deviation parameters.
+            - `gaussian_sac`: Returns a GaussianSACActor with a learnable std deviation network.
+            - `mlp`: Returns an MLPActor, commonly used in DDPG and TD3 algorithms.
+            - `vae`: Returns a Variational Autoencoder (VAE) actor.
+            - `perturbation`: Returns a PerturbationActor.
+            - `beta`: Returns a BetaLearningActor.
 
         Args:
             actor_type (ActorType): Type of actor network, e.g. ``gaussian_learning``.
diff --git a/tests/test_policy.py b/tests/test_policy.py
index 8492e2193..21ed70782 100644
--- a/tests/test_policy.py
+++ b/tests/test_policy.py
@@ -98,6 +98,7 @@ def test_rcbf(auto_alpha):
     }
     agent = omnisafe.Agent('SACRCBF', env_id, custom_cfgs=custom_cfgs)
     agent.learn()
+    agent.evaluate(num_episodes=1)
 
 
 @helpers.parametrize(optim_case=optim_case)

From 38b0a5c60c09f6d0f84e59bddb2177227656a420 Mon Sep 17 00:00:00 2001
From: Gaiejj <524339208@qq.com>
Date: Tue, 14 May 2024 15:07:20 +0800
Subject: [PATCH 08/18] style: fix comment

---
 .pre-commit-config.yaml                       |  3 +-
 .../adapter/beta_barrier_function_adapter.py  |  2 +-
 .../robust_barrier_function_adapter.py        |  4 +-
 omnisafe/algorithms/off_policy/sac_rcbf.py    |  1 -
 omnisafe/algorithms/on_policy/base/ppo.py     | 56 +++++++++++++++++++
 omnisafe/common/barrier_comp.py               |  1 -
 omnisafe/common/barrier_solver.py             | 52 +++++++++--------
 omnisafe/common/robust_barrier_solver.py      | 17 +++---
 omnisafe/common/robust_gp_model.py            | 36 ++++++------
 omnisafe/configs/off-policy/DDPGCBF.yaml      |  1 -
 omnisafe/configs/off-policy/SACRCBF.yaml      | 46 +++++++--------
 omnisafe/configs/on-policy/IPO.yaml           | 44 ++++++++++++---
 omnisafe/configs/on-policy/TRPOCBF.yaml       |  1 -
 .../envs/classic_control/envs_from_rcbf.py    |  9 +--
 omnisafe/envs/rcbf_env.py                     | 18 +-----
 omnisafe/evaluator.py                         |  1 -
 omnisafe/utils/tools.py                       | 14 ++---
 17 files changed, 181 insertions(+), 125 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 4b40fedd1..96e584f57 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -118,6 +118,5 @@ repos:
             ^omnisafe/common/control_barrier_function/crabs/models.py$|
             ^omnisafe/common/control_barrier_function/crabs/optimizers.py$|
             ^omnisafe/common/control_barrier_function/crabs/utils.py$|
-            ^conftest.py$|
-            ^setup.py$
+            ^conftest.py$
           )
diff --git a/omnisafe/adapter/beta_barrier_function_adapter.py b/omnisafe/adapter/beta_barrier_function_adapter.py
index 22bab63ff..9364b5282 100644
--- a/omnisafe/adapter/beta_barrier_function_adapter.py
+++ b/omnisafe/adapter/beta_barrier_function_adapter.py
@@ -30,7 +30,7 @@
 from omnisafe.utils.config import Config
 
 
-# # pylint: disable-next=too-many-locals
+# pylint: disable-next=too-many-locals
 def cbf(state: np.ndarray, eta: float = 0.99) -> tuple[np.ndarray, np.ndarray]:
     """Calculates the Control Barrier Function (CBF) constraints.
 
diff --git a/omnisafe/adapter/robust_barrier_function_adapter.py b/omnisafe/adapter/robust_barrier_function_adapter.py
index a7c6dc394..ade39d12f 100644
--- a/omnisafe/adapter/robust_barrier_function_adapter.py
+++ b/omnisafe/adapter/robust_barrier_function_adapter.py
@@ -32,7 +32,7 @@
 
 
 class RobustBarrierFunctionAdapter(OffPolicyAdapter):
-    """Off Policy Robust Barrier Function Adapter for OmniSafe.
+    """Robust Barrier Function Adapter for OmniSafe.
 
     :class:`RobustBarrierFunctionAdapter` is used to adapt the environment with RCBF controller.
 
@@ -44,7 +44,7 @@ class RobustBarrierFunctionAdapter(OffPolicyAdapter):
     """
 
     def __init__(self, env_id: str, num_envs: int, seed: int, cfgs: Config) -> None:
-        """Initialize an instance of :class:`BarrierFunctionAdapter`."""
+        """Initialize an instance of :class:`RobustBarrierFunctionAdapter`."""
         super().__init__(env_id, num_envs, seed, cfgs)
         self.solver: CBFQPLayer
         self.dynamics_model: DynamicsModel
diff --git a/omnisafe/algorithms/off_policy/sac_rcbf.py b/omnisafe/algorithms/off_policy/sac_rcbf.py
index e1a351020..fcb7dad26 100644
--- a/omnisafe/algorithms/off_policy/sac_rcbf.py
+++ b/omnisafe/algorithms/off_policy/sac_rcbf.py
@@ -53,7 +53,6 @@ def _init_env(self) -> None:
             env=self._env,
             device=self._cfgs.train_cfgs.device,
             gamma_b=self._cfgs.cbf_cfgs.gamma_b,
-            k_d=self._cfgs.cbf_cfgs.k_d,
             l_p=self._cfgs.cbf_cfgs.l_p,
         )
         dynamics_model = DynamicsModel(env=self._env)
diff --git a/omnisafe/algorithms/on_policy/base/ppo.py b/omnisafe/algorithms/on_policy/base/ppo.py
index 463b286c8..69f0ce4e9 100644
--- a/omnisafe/algorithms/on_policy/base/ppo.py
+++ b/omnisafe/algorithms/on_policy/base/ppo.py
@@ -16,6 +16,8 @@
 
 from __future__ import annotations
 
+import torch
+
 from omnisafe.algorithms import registry
 from omnisafe.algorithms.on_policy.base.policy_gradient import PolicyGradient
 
@@ -29,3 +31,57 @@ class PPO(PolicyGradient):
         - Authors: John Schulman, Filip Wolski, Prafulla Dhariwal, Alec Radford, Oleg Klimov.
         - URL: `PPO <https://arxiv.org/abs/1707.06347>`_
     """
+
+    def _loss_pi(
+        self,
+        obs: torch.Tensor,
+        act: torch.Tensor,
+        logp: torch.Tensor,
+        adv: torch.Tensor,
+    ) -> torch.Tensor:
+        r"""Computing pi/actor loss.
+
+        In Proximal Policy Optimization, the loss is defined as:
+
+        .. math::
+
+            L^{CLIP} = \underset{s_t \sim \rho_{\theta}}{\mathbb{E}} \left[
+                \min ( r_t A^{R}_{\pi_{\theta}} (s_t, a_t) , \text{clip} (r_t, 1 - \epsilon, 1 + \epsilon)
+                A^{R}_{\pi_{\theta}} (s_t, a_t)
+            \right]
+
+        where :math:`r_t = \frac{\pi_{\theta}^{'} (a_t|s_t)}{\pi_{\theta} (a_t|s_t)}`,
+        :math:`\epsilon` is the clip parameter, and :math:`A^{R}_{\pi_{\theta}} (s_t, a_t)` is the
+        advantage.
+
+        Args:
+            obs (torch.Tensor): The ``observation`` sampled from buffer.
+            act (torch.Tensor): The ``action`` sampled from buffer.
+            logp (torch.Tensor): The ``log probability`` of action sampled from buffer.
+            adv (torch.Tensor): The ``advantage`` processed. ``reward_advantage`` here.
+
+        Returns:
+            The loss of pi/actor.
+        """
+        distribution = self._actor_critic.actor(obs)
+        logp_ = self._actor_critic.actor.log_prob(act)
+        std = self._actor_critic.actor.std
+        ratio = torch.exp(logp_ - logp)
+        ratio_cliped = torch.clamp(
+            ratio,
+            1 - self._cfgs.algo_cfgs.clip,
+            1 + self._cfgs.algo_cfgs.clip,
+        )
+        loss = -torch.min(ratio * adv, ratio_cliped * adv).mean()
+        loss -= self._cfgs.algo_cfgs.entropy_coef * distribution.entropy().mean()
+        # useful extra info
+        entropy = distribution.entropy().mean().item()
+        self._logger.store(
+            {
+                'Train/Entropy': entropy,
+                'Train/PolicyRatio': ratio,
+                'Train/PolicyStd': std,
+                'Loss/Loss_pi': loss.mean().item(),
+            },
+        )
+        return loss
diff --git a/omnisafe/common/barrier_comp.py b/omnisafe/common/barrier_comp.py
index 40381ccd3..891932188 100644
--- a/omnisafe/common/barrier_comp.py
+++ b/omnisafe/common/barrier_comp.py
@@ -85,7 +85,6 @@ def update(
         Returns:
             torch.Tensor: The loss after training.
         """
-        # Train the model
         for _ in range(self._cfgs.update_iters):
             target = approx_compensating_act + compensating_act
             self.optimizer.zero_grad()
diff --git a/omnisafe/common/barrier_solver.py b/omnisafe/common/barrier_solver.py
index e4471fb38..35221281e 100644
--- a/omnisafe/common/barrier_solver.py
+++ b/omnisafe/common/barrier_solver.py
@@ -52,15 +52,7 @@ def __init__(
         max_speed: float = 60.0,
         device: str = 'cpu',
     ) -> None:
-        """Initializes the PendulumSolver with specified parameters.
-
-        Args:
-            action_size (int): Size of the action space.
-            observation_size (int): Size of the observation space.
-            torque_bound (float): Maximum torque bound.
-            max_speed (float): Maximum speed of the pendulum.
-            device (str): Device to run the computations on.
-        """
+        """Initialize the PendulumSolver with specified parameters."""
         self.action_size = action_size
         self.observation_size = observation_size
         self.torque_bound = torque_bound
@@ -77,7 +69,7 @@ def __init__(
         warnings.filterwarnings('ignore')
 
     def build_gp_model(self, save_dir: str | None = None) -> None:
-        """Builds the Gaussian Process model."""
+        """Build the Gaussian Process model."""
         gp_list = []
         noise = 0.01
         for _ in range(self.observation_size - 1):
@@ -96,7 +88,7 @@ def gp_models(self) -> list[GaussianProcessRegressor]:
         return self.gp_model
 
     def _build_barrier(self) -> None:
-        """Builds the barrier for the pendulum solver."""
+        """Build the barrier for the pendulum solver."""
         self.P = matrix(np.diag([1.0, 1e16]), tc='d')
         self.q = matrix(np.zeros(self.action_size + 1))
         self.h1 = np.array([1, 0.01])
@@ -112,7 +104,7 @@ def control_barrier(  # pylint: disable=invalid-name
         x: np.ndarray,
         std: np.ndarray,
     ) -> torch.Tensor:
-        """Adjusts the original action using a control barrier function.
+        """Adjust the original action using a control barrier function.
 
         Args:
             original_action (torch.Tensor): The original action proposed by the RL algorithm.
@@ -124,12 +116,12 @@ def control_barrier(  # pylint: disable=invalid-name
         Returns:
             torch.Tensor: The adjusted action that respects the system's constraints.
         """
-        # Define gamma for the barrier function
+        # define gamma for the barrier function
         gamma_b = 0.5
         kd = 1.5
         u_rl = original_action.cpu().detach().numpy()
 
-        # Set up Quadratic Program to satisfy Control Barrier Function
+        # set up Quadratic Program to satisfy Control Barrier Function
         G = np.array(
             [
                 [
@@ -185,14 +177,14 @@ def control_barrier(  # pylint: disable=invalid-name
         )
         h = np.squeeze(h).astype(np.double)
 
-        # Convert numpy arrays to cvx matrices to set up QP
+        # convert numpy arrays to cvx matrices to set up QP
         G = matrix(G, tc='d')
         h = matrix(h, tc='d')
         solvers.options['show_progress'] = False
         sol = solvers.qp(self.P, self.q, G, h)
         u_bar = sol['x']
 
-        # Check if the adjusted action is within bounds
+        # check if the adjusted action is within bounds
         if np.add(np.squeeze(u_rl), np.squeeze(u_bar[0])) - 0.001 >= self.torque_bound:
             u_bar[0] = self.torque_bound - u_rl
             print('Error in QP')
@@ -204,7 +196,7 @@ def control_barrier(  # pylint: disable=invalid-name
 
     # pylint: disable-next=attribute-defined-outside-init,import-outside-toplevel,invalid-name
     def get_dynamics(self, obs: list[float], original_action: float) -> np.ndarray:
-        """Calculates the dynamics of the system.
+        """Calculate the dynamics of the system.
 
         Args:
             obs (list[float]): The current observation of the system state.
@@ -213,15 +205,21 @@ def get_dynamics(self, obs: list[float], original_action: float) -> np.ndarray:
         Returns:
             np.ndarray: The calculated dynamics of the system.
         """
-        dt = 0.05  # Time step
-        G = 10  # Gravitational constant
-        m = 2  # Mass
-        length = 2  # Length
+        # time step
+        dt = 0.05
+        # gravitational constant
+        G = 10
+        # mass
+        m = 2
+        # length
+        length = 2
 
-        theta = np.arctan2(obs[1], obs[0])  # Calculate the angle
-        theta_dot = obs[2]  # Angular velocity
+        # calculate the angle
+        theta = np.arctan2(obs[1], obs[0])
+        # angular velocity
+        theta_dot = obs[2]
 
-        # Dynamics equations
+        # dynamics equations
         f = np.array(
             [
                 -3 * G / (2 * length) * np.sin(theta + np.pi) * dt**2
@@ -237,7 +235,7 @@ def get_dynamics(self, obs: list[float], original_action: float) -> np.ndarray:
         return np.squeeze(f)
 
     def update_gp_dynamics(self, obs: np.ndarray, act: np.ndarray) -> None:
-        """Updates the Gaussian Process (GP) dynamics model based on observed states and actions.
+        """Update the Gaussian Process (GP) dynamics model based on observed states and actions.
 
         Args:
             obs (np.ndarray): Observed states.
@@ -263,7 +261,7 @@ def update_gp_dynamics(self, obs: np.ndarray, act: np.ndarray) -> None:
         self.gp_model[1].fit(S, err[:, 1])
 
     def get_gp_dynamics(self, obs: torch.Tensor, use_prev_model: bool) -> list[np.ndarray]:
-        """Retrieves the gp dynamics based on the current observation.
+        """Retrieve the GP dynamics based on the current observation.
 
         Args:
             obs (torch.Tensor): Current state observation.
@@ -311,6 +309,6 @@ def get_gp_dynamics(self, obs: torch.Tensor, use_prev_model: bool) -> list[np.nd
         ]
 
     def reset_gp_model(self) -> None:
-        """Reset the gaussian processing model of barrier function solver."""
+        """Reset the gaussian process model of barrier function solver."""
         self.gp_model_prev = self.gp_model.copy()
         self.build_gp_model()
diff --git a/omnisafe/common/robust_barrier_solver.py b/omnisafe/common/robust_barrier_solver.py
index 62499352b..348a1a4c7 100644
--- a/omnisafe/common/robust_barrier_solver.py
+++ b/omnisafe/common/robust_barrier_solver.py
@@ -36,11 +36,19 @@ class CBFQPLayer:
     """CBFQLayer for robust control barrier function solver.
 
     Args:
-        env (gym.Env): The Gym environment to interact with.
+        env (gymnasium.Env): The Gymnasium environment to interact with.
         device (str, optional): The device type, such as 'cpu' or 'gpu'. Defaults to 'cpu'.
         gamma_b (float, optional): The gamma parameter. Defaults to 20.
-        k_d (float, optional): The confidence parameter desired. Defaults to 3.0.
         l_p (float, optional): Some additional layer parameter, purpose unspecified. Defaults to 0.03.
+
+    Attributes:
+        device (torch.device): The device on which computations will be performed.
+        env (gym.Env): The Gym environment instance.
+        u_min (float): The minimum control bound.
+        u_max (float): The maximum control bound.
+        gamma_b (float): The gamma parameter.
+        l_p (float): An additional layer parameter.
+        action_dim (int): The dimensionality of the action space.
     """
 
     def __init__(
@@ -48,7 +56,6 @@ def __init__(
         env: gym.Env,
         device: str = 'cpu',
         gamma_b: float = 20,
-        k_d: float = 3.0,
         l_p: float = 0.03,
     ) -> None:
         """Initializes a CBFLayer instance with specified parameters and environment."""
@@ -56,7 +63,6 @@ def __init__(
         self.env = env
         self.u_min, self.u_max = self.get_control_bounds()
         self.gamma_b = gamma_b
-        self.k_d = k_d
         self.l_p = l_p
         self.action_dim = env.action_space.shape[0]
         warnings.filterwarnings('ignore')
@@ -79,7 +85,6 @@ def get_safe_action(
         Returns:
             torch.Tensor: Safe actions adjusted for given constraints and uncertainties.
         """
-        # Batch form adjustment if only a single data point is passed
         expand_dims = len(state_batch.shape) == 1
         if expand_dims:
             state_batch = state_batch.unsqueeze(0)
@@ -227,7 +232,6 @@ def get_cbf_qp_constraints(
         batch_size = state_batch.shape[0]
         gamma_b = self.gamma_b
 
-        # Expand dims
         state_batch = torch.unsqueeze(state_batch, -1).to(self.device)
         action_batch = torch.unsqueeze(action_batch, -1).to(self.device)
         mean_pred_batch = torch.unsqueeze(mean_pred_batch, -1).to(self.device)
@@ -261,7 +265,6 @@ def get_cbf_qp_constraints(
             sigma_theta_aug[:, 1, :] = sigma_pred_batch[:, 2, :]
             sigma_ps = torch.bmm(torch.abs(g_ps), sigma_theta_aug) + sigma_pred_batch[:, :2, :]
 
-            # Build RCBFs
             hs = 1e3 * torch.ones((batch_size, num_cbfs), device=self.device)
             dhdps = torch.zeros((batch_size, num_cbfs, 2), device=self.device)
             hazards = self.env.hazards
diff --git a/omnisafe/common/robust_gp_model.py b/omnisafe/common/robust_gp_model.py
index 62679fd59..5a305140d 100644
--- a/omnisafe/common/robust_gp_model.py
+++ b/omnisafe/common/robust_gp_model.py
@@ -134,11 +134,11 @@ def __init__(
         warnings.filterwarnings('ignore')
 
     def train(self, training_iter: int) -> None:
-        """Trains the Gaussian Process model.
+        """Train the Gaussian Process model.
 
         Args:
             training_iter (int): Number of training iterations.
-            verbose (bool): If True, prints detailed logging information.
+            verbose (bool): If True, print detailed logging information.
         """
         self.model.train()
         self.likelihood.train()
@@ -153,7 +153,7 @@ def train(self, training_iter: int) -> None:
             optimizer.step()
 
     def predict(self, test_x: torch.Tensor) -> dict[str, torch.Tensor | np.ndarray]:
-        """Makes predictions on new data.
+        """Make predictions on new data.
 
         Args:
             test_x (torch.Tensor): Test data features. If not a tensor, it will be converted.
@@ -188,13 +188,13 @@ def predict(self, test_x: torch.Tensor) -> dict[str, torch.Tensor | np.ndarray]:
 
 # pylint: disable-next=too-many-instance-attributes
 class DynamicsModel:
-    """Initializes the DynamicsModel with a gym environment.
+    """Initialize the DynamicsModel with a gymnasium environment.
 
     Args:
-        env (gym.Env): The gym environment to model dynamics for.
-        gp_model_size (int, optional): Maximum history count for disturbances. Defaults to 2000.
-        l_p (float, optional): Learning parameter. Defaults to 0.03.
-        device (str, optional): The device to perform computations on. Defaults to 'cpu'.
+        env (gym.Env): The gymnasium environment to model dynamics for.
+        gp_model_size (int, optional): Maximum history count for disturbances. Default to 2000.
+        l_p (float, optional): Learning parameter. Default to 0.03.
+        device (str, optional): The device to perform computations on. Default to 'cpu'.
     """
 
     def __init__(
@@ -204,7 +204,7 @@ def __init__(
         l_p: float = 0.03,
         device: str = 'cpu',
     ) -> None:
-        """Initializes the DynamicsModel with a gym environment."""
+        """Initialize the DynamicsModel with a gymnasium environment."""
         self.env = env
         self.get_f, self.get_g = self.get_dynamics()
         self.n_s = DYNAMICS_MODE[self.env.dynamics_mode]['n_s']
@@ -233,7 +233,7 @@ def __init__(
         self.l_p = l_p
 
     def get_dynamics(self) -> tuple[Callable, Callable]:
-        """Retrieves the dynamics functions for drift and control based on the environment's dynamics mode.
+        """Retrieve the dynamics functions for drift and control based on the environment's dynamics mode.
 
         Returns:
             tuple: A tuple containing two callable methods, `get_f` and `get_g`.
@@ -257,7 +257,7 @@ def get_g(state_batch: np.ndarray) -> np.ndarray:
         return get_f, get_g
 
     def get_state(self, obs: torch.Tensor) -> torch.Tensor:
-        """Processes the raw observations from the environment.
+        """Process the raw observations from the environment.
 
         Args:
             obs (torch.Tensor): The environment observations.
@@ -293,7 +293,7 @@ def append_transition(
         u_batch: np.ndarray,
         next_state_batch: np.ndarray,
     ) -> None:
-        """Estimates the disturbance from the current dynamics transition and adds it to the buffer.
+        """Estimate the disturbance from the current dynamics transition and adds it to the buffer.
 
         Args:
             state_batch (np.ndarray): The batch of current states, shape (n_s,) or (batch_size, n_s).
@@ -321,7 +321,7 @@ def append_transition(
                 self.fit_gp_model()
 
     def fit_gp_model(self, training_iter: int = 70) -> None:
-        """Fits a Gaussian Process model to the disturbance data.
+        """Fit a Gaussian Process model to the disturbance data.
 
         Args:
             training_iter (int, optional): Number of training iterations for the GP model. Defaults to 70.
@@ -354,7 +354,7 @@ def fit_gp_model(self, training_iter: int = 70) -> None:
         self._train_y = train_y
 
     def predict_disturbance(self, test_x: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
-        """Predicts the disturbance at the queried states using the trained Gaussian Process models.
+        """Predict the disturbance at the queried states using the trained Gaussian Process models.
 
         Args:
             test_x (torch.Tensor): The state for which to predict disturbances, shape (n_test, n_s).
@@ -394,7 +394,7 @@ def predict_disturbance(self, test_x: torch.Tensor) -> tuple[torch.Tensor, torch
         return (to_tensor(means, dtype, device), to_tensor(f_std, dtype, device))
 
     def load_disturbance_models(self, save_dir: str, epoch: str) -> None:
-        """Loads the disturbance models and their training data.
+        """Load the disturbance models and their training data.
 
         Args:
             save_dir (str): The directory where the model files are saved.
@@ -420,15 +420,15 @@ def load_disturbance_models(self, save_dir: str, epoch: str) -> None:
 
     @property
     def train_x(self) -> np.ndarray:
-        """Returns the training data input features used for the disturbance estimators."""
+        """Return the training data input features used for the disturbance estimators."""
         return self._train_x
 
     @property
     def train_y(self) -> np.ndarray:
-        """Returns the training data labels used for the disturbance estimators."""
+        """Return the training data labels used for the disturbance estimators."""
         return self._train_y
 
     @property
     def disturb_estimators(self) -> list[GPyDisturbanceEstimator]:
-        """Provides access to the list of trained disturbance estimator models."""
+        """Provide access to the list of trained disturbance estimator models."""
         return self._disturb_estimators
diff --git a/omnisafe/configs/off-policy/DDPGCBF.yaml b/omnisafe/configs/off-policy/DDPGCBF.yaml
index 3eec4dced..f9d706305 100644
--- a/omnisafe/configs/off-policy/DDPGCBF.yaml
+++ b/omnisafe/configs/off-policy/DDPGCBF.yaml
@@ -105,7 +105,6 @@ defaults:
       # Size of hidden layers
       hidden_sizes: [400, 300]
       # Activation function
-
       activation: relu
       # The learning rate of Critic network
       lr: 0.001
diff --git a/omnisafe/configs/off-policy/SACRCBF.yaml b/omnisafe/configs/off-policy/SACRCBF.yaml
index 53c5e5a17..b4d182c50 100644
--- a/omnisafe/configs/off-policy/SACRCBF.yaml
+++ b/omnisafe/configs/off-policy/SACRCBF.yaml
@@ -38,9 +38,9 @@ defaults:
     update_cycle: 1
     # number of iterations to update the policy
     update_iters: 1
-    # The size of replay buffer
+    # size of replay buffer
     size: 1000000
-    # The size of batch
+    # size of batch
     batch_size: 256
     # normalize reward
     reward_normalize: False
@@ -54,23 +54,23 @@ defaults:
     use_critic_norm: False
     # critic norm coefficient
     critic_norm_coeff: 0.001
-    # The soft update coefficient
+    # soft update coefficient
     polyak: 0.005
-    # The discount factor of GAE
+    # discount factor of GAE
     gamma: 0.99
-    # Actor perdorm random action before `start_learning_steps` steps
+    # actor perform random action before `start_learning_steps` steps
     start_learning_steps: 5000
-    # The delay step of policy update
+    # delay step of policy update
     policy_delay: 1
-    # Whether to use the exploration noise
+    # whether to use the exploration noise
     use_exploration_noise: False
-    # The exploration noise
+    # exploration noise
     exploration_noise: 0.1
-    # The policy noise
+    # policy noise
     policy_noise: 0.2
     # policy_noise_clip
     policy_noise_clip: 0.5
-    # The value of alpha
+    # value of alpha
     alpha: 0.2
     # Whether to use auto alpha
     auto_alpha: True
@@ -106,29 +106,29 @@ defaults:
     actor_type: gaussian_sac
     # linear learning rate decay
     linear_lr_decay: False
-    # Configuration of Actor network
+    # configuration of actor network
     actor:
-      # Size of hidden layers
+      # size of hidden layers
       hidden_sizes: [400, 300]
-      # Activation function
+      # activation function
       activation: relu
-      # The learning rate of Actor network
+      # learning rate of actor network
       lr: 0.0003
-    # Configuration of Critic network
+    # configuration of critic network
     critic:
-      # The number of critic networks
+      # number of critic networks
       num_critics: 2
-      # Size of hidden layers
+      # size of hidden layers
       hidden_sizes: [400, 300]
-      # Activation function
+      # activation function
       activation: relu
-      # The learning rate of Critic network
+      # learning rate of critic network
       lr: 0.0003
-  # Dynamics model configurations
+  # dynamics model configurations
   dynamics_model_cfgs:
-    # The max number of episodes updating GP models
+    # max number of episodes updating GP models
     gp_max_episodes: 100
-    # The size of gp model
+    # size of gp model
     gp_model_size: 2000
-    # Whether to use the action compensator
+    # whether to use the action compensator
     use_compensator: False
diff --git a/omnisafe/configs/on-policy/IPO.yaml b/omnisafe/configs/on-policy/IPO.yaml
index e2a6869c3..807984252 100644
--- a/omnisafe/configs/on-policy/IPO.yaml
+++ b/omnisafe/configs/on-policy/IPO.yaml
@@ -27,17 +27,17 @@ defaults:
     # number of parallel agent, similar to a3c
     parallel: 1
     # total number of steps to train
-    total_steps: 80_000
+    total_steps: 10000000
   # algorithm configurations
   algo_cfgs:
     # number of steps to update the policy
-    steps_per_epoch: 2000
+    steps_per_epoch: 20000
     # number of iterations to update the policy
-    update_iters: 10
+    update_iters: 40
     # batch size for each iteration
-    batch_size: 256
+    batch_size: 64
     # target kl divergence
-    target_kl: 0.005
+    target_kl: 0.02
     # entropy coefficient
     entropy_coef: 0.0
     # normalize reward
@@ -45,7 +45,7 @@ defaults:
     # normalize cost
     cost_normalize: False
     # normalize observation
-    obs_normalize: False
+    obs_normalize: True
     # early stop when kl divergence is bigger than target kl
     kl_early_stop: True
     # use max gradient norm
@@ -57,11 +57,11 @@ defaults:
     # critic norm coefficient
     critic_norm_coef: 0.001
     # reward discount factor
-    gamma: 0.995
+    gamma: 0.99
     # cost discount factor
     cost_gamma: 0.99
     # lambda for gae
-    lam: 0.98
+    lam: 0.95
     # lambda for cost gae
     lam_c: 0.95
     # clip ratio
@@ -127,10 +127,36 @@ defaults:
   # lagrangian configurations
   lagrange_cfgs:
     # Tolerance of constraint violation
-    cost_limit: 1000.0
+    cost_limit: 25.0
     # Initial value of lagrangian multiplier
     lagrangian_multiplier_init: 0.001
     # Learning rate of lagrangian multiplier
     lambda_lr: 0.035
     # Type of lagrangian optimizer
     lambda_optimizer: "Adam"
+
+Pendulum-v1:
+  # training configurations
+  train_cfgs:
+    # total number of steps to train
+    total_steps: 80_000
+  # algorithm configurations
+  algo_cfgs:
+    # number of steps to update the policy
+    steps_per_epoch: 2000
+    # number of iterations to update the policy
+    update_iters: 10
+    # batch size for each iteration
+    batch_size: 256
+    # target kl divergence
+    target_kl: 0.005
+    # normalize observation
+    obs_normalize: False
+    # reward discount factor
+    gamma: 0.995
+    # lambda for gae
+    lam: 0.98
+  # lagrangian configurations
+  lagrange_cfgs:
+    # Tolerance of constraint violation
+    cost_limit: 1000.0
diff --git a/omnisafe/configs/on-policy/TRPOCBF.yaml b/omnisafe/configs/on-policy/TRPOCBF.yaml
index 9d1b67ec0..2b0b16126 100644
--- a/omnisafe/configs/on-policy/TRPOCBF.yaml
+++ b/omnisafe/configs/on-policy/TRPOCBF.yaml
@@ -116,7 +116,6 @@ defaults:
       hidden_sizes: [64, 64]
       # activation function
       activation: relu
-      # out_activation: tanh
       # learning rate
       lr: ~
     # critic network configurations
diff --git a/omnisafe/envs/classic_control/envs_from_rcbf.py b/omnisafe/envs/classic_control/envs_from_rcbf.py
index 33e13189c..211c8a352 100644
--- a/omnisafe/envs/classic_control/envs_from_rcbf.py
+++ b/omnisafe/envs/classic_control/envs_from_rcbf.py
@@ -43,7 +43,6 @@ def __init__(self) -> None:
         self.max_episode_steps = 1000
         self.reward_goal = 1.0
         self.goal_size = 0.3
-        # Initialize Env
         self.state = None
         self.episode_step = 0
         self.initial_state = np.array(
@@ -54,13 +53,9 @@ def __init__(self) -> None:
 
         self.reset()
 
-        # Get Dynamics
         self.get_f, self.get_g = self._get_dynamics()
-        # Disturbance
         self.disturb_mean = np.zeros((3,))
         self.disturb_covar = np.diag([0.005, 0.005, 0.05]) * 20
-
-        # Build Hazards
         self.hazards = []
 
         self.hazards.append(
@@ -78,8 +73,6 @@ def __init__(self) -> None:
         self.hazards.append(
             {'type': 'circle', 'radius': 0.6, 'location': 1.5 * np.array([1.0, 1.0])},
         )
-
-        # Viewer
         self.viewer = None
 
     def step(
@@ -145,7 +138,7 @@ def get_obs(self) -> np.ndarray:
         """
         rel_loc = self.goal_pos - self.state[:2]
         goal_dist = np.linalg.norm(rel_loc)
-        goal_compass = self.obs_compass()  # compass to the goal
+        goal_compass = self.obs_compass()
 
         return np.array(
             [
diff --git a/omnisafe/envs/rcbf_env.py b/omnisafe/envs/rcbf_env.py
index a8fbdcd28..983528489 100644
--- a/omnisafe/envs/rcbf_env.py
+++ b/omnisafe/envs/rcbf_env.py
@@ -31,7 +31,7 @@
 
 @env_register
 class RobustBarrierFunctionEnv(CMDP):
-    """Interface of control barrier function-based environments.
+    """Interface of robust control barrier function-based environments.
 
     .. warning::
         Since environments based on control barrier functions require special judgment and control
@@ -56,21 +56,7 @@ def __init__(
         device: str = 'cpu',
         **kwargs: Any,
     ) -> None:
-        """Initialize the environment.
-
-        Args:
-            env_id (str): Environment id.
-            num_envs (int, optional): Number of environments. Defaults to 1.
-            device (torch.device, optional): Device to store the data. Defaults to 'cpu'.
-
-        Keyword Args:
-            render_mode (str, optional): The render mode, ranging from ``human``, ``rgb_array``, ``rgb_array_list``.
-                Defaults to ``rgb_array``.
-            camera_name (str, optional): The camera name.
-            camera_id (int, optional): The camera id.
-            width (int, optional): The width of the rendered image. Defaults to 256.
-            height (int, optional): The height of the rendered image. Defaults to 256.
-        """
+        """Initialize the robust control barrier function-based environments."""
         super().__init__(env_id)
         self._env_id = env_id
         if num_envs == 1:
diff --git a/omnisafe/evaluator.py b/omnisafe/evaluator.py
index 90535d931..691d6aa86 100644
--- a/omnisafe/evaluator.py
+++ b/omnisafe/evaluator.py
@@ -330,7 +330,6 @@ def __load_model_and_env(
                     env=self._env,
                     device=self._cfgs['train_cfgs']['device'],
                     gamma_b=self._cfgs['cbf_cfgs']['gamma_b'],
-                    k_d=self._cfgs['cbf_cfgs']['k_d'],
                     l_p=self._cfgs['cbf_cfgs']['l_p'],
                 )
                 self._dynamics_model = DynamicsModel(env=self._env)
diff --git a/omnisafe/utils/tools.py b/omnisafe/utils/tools.py
index 7c7a10ceb..d5be5369d 100644
--- a/omnisafe/utils/tools.py
+++ b/omnisafe/utils/tools.py
@@ -367,13 +367,13 @@ def to_tensor(
     """Convert a numpy array to a torch tensor of specified type and device.
 
     Args:
-    x (np.ndarray): A numpy array to be converted.
-    dtype (torch.dtype): The desired data type for the tensor.
-    device (torch.device): The device to store the tensor on.
-    requires_grad (bool): If True, gradients will be computed for operations involving this tensor.
+        x (np.ndarray): A numpy array to be converted.
+        dtype (torch.dtype): The desired data type for the tensor.
+        device (torch.device): The device to store the tensor on.
+        requires_grad (bool): If True, gradients will be computed for operations involving this tensor.
 
     Returns:
-    torch.Tensor: A torch tensor representation of the input array.
+        torch.Tensor: A torch tensor representation of the input array.
     """
     return torch.from_numpy(x).type(dtype).to(device).requires_grad_(requires_grad)
 
@@ -382,10 +382,10 @@ def sort_vertices_cclockwise(vertices: np.ndarray) -> np.ndarray:
     """Sort vertices of a 2D convex polygon in counter-clockwise direction.
 
     Args:
-    vertices (np.ndarray): An array of shape (n_v, 2) where n_v is the number of vertices.
+        vertices (np.ndarray): An array of shape (n_v, 2) where n_v is the number of vertices.
 
     Returns:
-    np.ndarray: An array of vertices sorted in counter-clockwise direction.
+        np.ndarray: An array of vertices sorted in counter-clockwise direction.
     """
     assert vertices.shape[1] == 2, f'Vertices must each have dimension 2, got {vertices.shape[1]}'
     polygon_center = vertices.sum(axis=0, keepdims=True) / vertices.shape[0]  # (1, d)

From 483e42750d4eade310c5b07ec9fcfe16fc2084b7 Mon Sep 17 00:00:00 2001
From: Gaiejj <524339208@qq.com>
Date: Tue, 14 May 2024 22:51:15 +0800
Subject: [PATCH 09/18] style: fix comment

---
 omnisafe/adapter/__init__.py                  |  3 +++
 omnisafe/algorithms/off_policy/ddpg.py        | 12 ++++++----
 omnisafe/algorithms/off_policy/ddpg_cbf.py    |  2 +-
 omnisafe/algorithms/off_policy/sac_rcbf.py    |  2 +-
 .../on_policy/barrier_function/ppo_cbf.py     | 23 ++-----------------
 .../on_policy/base/policy_gradient.py         | 12 ++++++----
 omnisafe/configs/off-policy/SACRCBF.yaml      |  2 +-
 omnisafe/configs/on-policy/PPOBetaCBF.yaml    |  2 +-
 omnisafe/configs/on-policy/TRPOCBF.yaml       |  2 +-
 9 files changed, 26 insertions(+), 34 deletions(-)

diff --git a/omnisafe/adapter/__init__.py b/omnisafe/adapter/__init__.py
index 02dab6709..873eccc33 100644
--- a/omnisafe/adapter/__init__.py
+++ b/omnisafe/adapter/__init__.py
@@ -14,12 +14,15 @@
 # ==============================================================================
 """Adapter for the environment and the algorithm."""
 
+from omnisafe.adapter.barrier_function_adapter import BarrierFunctionAdapter
 from omnisafe.adapter.beta_barrier_function_adapter import BetaBarrierFunctionAdapter
 from omnisafe.adapter.early_terminated_adapter import EarlyTerminatedAdapter
 from omnisafe.adapter.modelbased_adapter import ModelBasedAdapter
 from omnisafe.adapter.offline_adapter import OfflineAdapter
 from omnisafe.adapter.offpolicy_adapter import OffPolicyAdapter
+from omnisafe.adapter.offpolicy_barrier_function_adapter import OffPolicyBarrierFunctionAdapter
 from omnisafe.adapter.online_adapter import OnlineAdapter
 from omnisafe.adapter.onpolicy_adapter import OnPolicyAdapter
+from omnisafe.adapter.robust_barrier_function_adapter import RobustBarrierFunctionAdapter
 from omnisafe.adapter.saute_adapter import SauteAdapter
 from omnisafe.adapter.simmer_adapter import SimmerAdapter
diff --git a/omnisafe/algorithms/off_policy/ddpg.py b/omnisafe/algorithms/off_policy/ddpg.py
index 0ce31f286..0d698e5f2 100644
--- a/omnisafe/algorithms/off_policy/ddpg.py
+++ b/omnisafe/algorithms/off_policy/ddpg.py
@@ -188,7 +188,7 @@ def _init_log(self) -> None:
             config=self._cfgs,
         )
 
-        self._log_what_to_save()
+        self._setup_torch_saver()
         self._logger.torch_save()
         self._specific_save()
 
@@ -559,8 +559,12 @@ def _log_when_not_update(self) -> None:
                 },
             )
 
-    def _log_what_to_save(self) -> None:
-        """Define what need to be saved below."""
+    def _setup_torch_saver(self) -> None:
+        """Define what need to be saved below.
+
+        OmniSafe's main storage interface is based on PyTorch. If you need to save models in other
+        formats, please use :meth:`_specific_save`.
+        """
         what_to_save: dict[str, Any] = {}
 
         what_to_save['pi'] = self._actor_critic.actor
@@ -571,4 +575,4 @@ def _log_what_to_save(self) -> None:
         self._logger.setup_torch_saver(what_to_save)
 
     def _specific_save(self) -> None:
-        """Save some algorithms specific models per epoch."""
+        """Save some algorithms specific models other than PyTorch format per epoch."""
diff --git a/omnisafe/algorithms/off_policy/ddpg_cbf.py b/omnisafe/algorithms/off_policy/ddpg_cbf.py
index de556372b..17dcacda3 100644
--- a/omnisafe/algorithms/off_policy/ddpg_cbf.py
+++ b/omnisafe/algorithms/off_policy/ddpg_cbf.py
@@ -1,4 +1,4 @@
-# Copyright 2023 OmniSafe Team. All Rights Reserved.
+# Copyright 2024 OmniSafe Team. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/omnisafe/algorithms/off_policy/sac_rcbf.py b/omnisafe/algorithms/off_policy/sac_rcbf.py
index fcb7dad26..e6c020770 100644
--- a/omnisafe/algorithms/off_policy/sac_rcbf.py
+++ b/omnisafe/algorithms/off_policy/sac_rcbf.py
@@ -1,4 +1,4 @@
-# Copyright 2023 OmniSafe Team. All Rights Reserved.
+# Copyright 2024 OmniSafe Team. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/omnisafe/algorithms/on_policy/barrier_function/ppo_cbf.py b/omnisafe/algorithms/on_policy/barrier_function/ppo_cbf.py
index 1b46857e6..4ab2f4d17 100644
--- a/omnisafe/algorithms/on_policy/barrier_function/ppo_cbf.py
+++ b/omnisafe/algorithms/on_policy/barrier_function/ppo_cbf.py
@@ -60,27 +60,8 @@ def _loss_pi(
     ) -> torch.Tensor:
         r"""Computing pi/actor loss.
 
-        In Proximal Policy Optimization, the loss is defined as:
-
-        .. math::
-
-            L^{CLIP} = \underset{s_t \sim \rho_{\theta}}{\mathbb{E}} \left[
-                \min ( r_t A^{R}_{\pi_{\theta}} (s_t, a_t) , \text{clip} (r_t, 1 - \epsilon, 1 + \epsilon)
-                A^{R}_{\pi_{\theta}} (s_t, a_t)
-            \right]
-
-        where :math:`r_t = \frac{\pi_{\theta}^{'} (a_t|s_t)}{\pi_{\theta} (a_t|s_t)}`,
-        :math:`\epsilon` is the clip parameter, and :math:`A^{R}_{\pi_{\theta}} (s_t, a_t)` is the
-        advantage.
-
-        Args:
-            obs (torch.Tensor): The ``observation`` sampled from buffer.
-            act (torch.Tensor): The ``action`` sampled from buffer.
-            logp (torch.Tensor): The ``log probability`` of action sampled from buffer.
-            adv (torch.Tensor): The ``advantage`` processed. ``reward_advantage`` here.
-
-        Returns:
-            The loss of pi/actor.
+        This section of the logic is consistent with PPO, except that it does not record the
+        standard deviation of the actor distribution.
         """
         distribution = self._actor_critic.actor(obs)
         logp_ = self._actor_critic.actor.log_prob(act)
diff --git a/omnisafe/algorithms/on_policy/base/policy_gradient.py b/omnisafe/algorithms/on_policy/base/policy_gradient.py
index 826ff7c1a..cb144922a 100644
--- a/omnisafe/algorithms/on_policy/base/policy_gradient.py
+++ b/omnisafe/algorithms/on_policy/base/policy_gradient.py
@@ -180,7 +180,7 @@ def _init_log(self) -> None:
             config=self._cfgs,
         )
 
-        self._log_what_to_save()
+        self._setup_torch_saver()
         self._logger.torch_save()
         self._specific_save()
 
@@ -584,8 +584,12 @@ def _loss_pi(
         )
         return loss
 
-    def _log_what_to_save(self) -> None:
-        """Define what need to be saved below."""
+    def _setup_torch_saver(self) -> None:
+        """Define what need to be saved below.
+
+        OmniSafe's main storage interface is based on PyTorch. If you need to save models in other
+        formats, please use :meth:`_specific_save`.
+        """
         what_to_save: dict[str, Any] = {}
 
         what_to_save['pi'] = self._actor_critic.actor
@@ -596,4 +600,4 @@ def _log_what_to_save(self) -> None:
         self._logger.setup_torch_saver(what_to_save)
 
     def _specific_save(self) -> None:
-        """Save some algorithms specific models per epoch."""
+        """Save some algorithms specific models other than PyTorch format per epoch."""
diff --git a/omnisafe/configs/off-policy/SACRCBF.yaml b/omnisafe/configs/off-policy/SACRCBF.yaml
index b4d182c50..f70327e6d 100644
--- a/omnisafe/configs/off-policy/SACRCBF.yaml
+++ b/omnisafe/configs/off-policy/SACRCBF.yaml
@@ -1,4 +1,4 @@
-# Copyright 2023 OmniSafe Team. All Rights Reserved.
+# Copyright 2024 OmniSafe Team. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/omnisafe/configs/on-policy/PPOBetaCBF.yaml b/omnisafe/configs/on-policy/PPOBetaCBF.yaml
index 4bd5f0f12..afb636e8b 100644
--- a/omnisafe/configs/on-policy/PPOBetaCBF.yaml
+++ b/omnisafe/configs/on-policy/PPOBetaCBF.yaml
@@ -1,4 +1,4 @@
-# Copyright 2023 OmniSafe Team. All Rights Reserved.
+# Copyright 2024 OmniSafe Team. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/omnisafe/configs/on-policy/TRPOCBF.yaml b/omnisafe/configs/on-policy/TRPOCBF.yaml
index 2b0b16126..c61d3df44 100644
--- a/omnisafe/configs/on-policy/TRPOCBF.yaml
+++ b/omnisafe/configs/on-policy/TRPOCBF.yaml
@@ -1,4 +1,4 @@
-# Copyright 2023 OmniSafe Team. All Rights Reserved.
+# Copyright 2024 OmniSafe Team. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

From dd9068f6174cdd4cd142ac3e96d40966e39641c9 Mon Sep 17 00:00:00 2001
From: Gaiejj <524339208@qq.com>
Date: Thu, 16 May 2024 15:17:56 +0800
Subject: [PATCH 10/18] style: fix pre-commit

---
 .pre-commit-config.yaml                      | 2 +-
 omnisafe/adapter/modelbased_adapter.py       | 4 ++--
 omnisafe/common/robust_barrier_solver.py     | 3 ++-
 omnisafe/envs/safety_gymnasium_modelbased.py | 6 +++++-
 omnisafe/utils/plotter.py                    | 3 +--
 5 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 96e584f57..51cbb81c1 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -29,7 +29,7 @@ repos:
       - id: debug-statements
       - id: double-quote-string-fixer
   - repo: https://github.com/charliermarsh/ruff-pre-commit
-    rev: v0.4.2
+    rev: v0.4.4
     hooks:
       - id: ruff
         args: [--fix, --exit-non-zero-on-fix]
diff --git a/omnisafe/adapter/modelbased_adapter.py b/omnisafe/adapter/modelbased_adapter.py
index 8abbd90d7..6e2154531 100644
--- a/omnisafe/adapter/modelbased_adapter.py
+++ b/omnisafe/adapter/modelbased_adapter.py
@@ -269,8 +269,8 @@ def rollout(  # pylint: disable=too-many-arguments,too-many-locals
 
         update_actor_critic_time = 0.0
         update_dynamics_time = 0.0
-        if use_eval:
-            eval_time = 0.0
+
+        eval_time = 0.0
 
         epoch_steps = 0
 
diff --git a/omnisafe/common/robust_barrier_solver.py b/omnisafe/common/robust_barrier_solver.py
index 348a1a4c7..a871ccc4d 100644
--- a/omnisafe/common/robust_barrier_solver.py
+++ b/omnisafe/common/robust_barrier_solver.py
@@ -237,7 +237,6 @@ def get_cbf_qp_constraints(
         mean_pred_batch = torch.unsqueeze(mean_pred_batch, -1).to(self.device)
         sigma_pred_batch = torch.unsqueeze(sigma_pred_batch, -1).to(self.device)
         if self.env.dynamics_mode == 'Unicycle':
-
             num_cbfs = len(self.env.hazards)
             l_p = self.l_p
             buffer = 0.1
@@ -299,6 +298,8 @@ def get_cbf_qp_constraints(
                 .to(self.device)
             )
             q = torch.zeros((batch_size, n_u + 1)).to(self.device)
+        else:
+            raise NotImplementedError
 
         n_u = action_batch.shape[1]
 
diff --git a/omnisafe/envs/safety_gymnasium_modelbased.py b/omnisafe/envs/safety_gymnasium_modelbased.py
index fe5ae5071..2e1a00598 100644
--- a/omnisafe/envs/safety_gymnasium_modelbased.py
+++ b/omnisafe/envs/safety_gymnasium_modelbased.py
@@ -181,6 +181,8 @@ def get_cost_from_obs_tensor(self, obs: torch.Tensor, is_binary: bool = True) ->
         elif len(obs.shape) == 3:
             batch_size = obs.shape[0] * obs.shape[1]
             hazard_obs = obs[:, :, hazards_key].reshape(batch_size, -1, 2)
+        else:
+            raise NotImplementedError
         hazards_dist = torch.sqrt(torch.sum(torch.square(hazard_obs), dim=2)).reshape(
             batch_size,
             -1,
@@ -497,8 +499,10 @@ def reset(
             self.get_lidar_from_coordinate(flat_coordinate_obs)
             info['obs_original'] = obs_original
             info['goal_met'] = False
-
             obs = torch.as_tensor(flat_coordinate_obs, dtype=torch.float32, device=self._device)
+        else:
+            obs = torch.as_tensor(obs_original, dtype=torch.float32, device=self._device)
+
         return obs, info
 
     def set_seed(self, seed: int) -> None:
diff --git a/omnisafe/utils/plotter.py b/omnisafe/utils/plotter.py
index 5bdbb7ec2..f24a97bb4 100644
--- a/omnisafe/utils/plotter.py
+++ b/omnisafe/utils/plotter.py
@@ -118,8 +118,7 @@ def plot_data(
                 smoothed_x = np.convolve(x, y, 'same') / np.convolve(z, y, 'same')
                 datum['Costs'] = smoothed_x
 
-        if isinstance(data, list):
-            data_to_plot = pd.concat(data, ignore_index=True)
+        data_to_plot = pd.concat(data, ignore_index=True)
         sns.lineplot(
             data=data_to_plot,
             x=xaxis,

From d18b1ac2991851c9b9be89eea4d0afc1e2e5d26d Mon Sep 17 00:00:00 2001
From: Gaiejj <gaiejj@outlook.com>
Date: Wed, 19 Jun 2024 20:13:27 +0800
Subject: [PATCH 11/18] feat: support customized plot

---
 examples/plot.py                    | 25 ++++++++++-
 omnisafe/common/experiment_grid.py  |  8 +++-
 omnisafe/common/statistics_tools.py |  9 +++-
 omnisafe/utils/plotter.py           | 64 +++++++++++++++++++++--------
 4 files changed, 87 insertions(+), 19 deletions(-)

diff --git a/examples/plot.py b/examples/plot.py
index c16974cce..a425587a7 100644
--- a/examples/plot.py
+++ b/examples/plot.py
@@ -1,4 +1,4 @@
-# Copyright 2023 OmniSafe Team. All Rights Reserved.
+# Copyright 2024 OmniSafe Team. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -35,6 +35,27 @@
     parser.add_argument('--select', nargs='*')
     parser.add_argument('--exclude', nargs='*')
     parser.add_argument('--estimator', default='mean')
+    parser.add_argument(
+        '--reward-metrics',
+        type=str,
+        choices=[
+            'Metrics/TestEpRet',
+            'Metrics/EpRet',
+        ],
+        default='Metrics/EpRet',
+        help='Specify the reward metric to be used.',
+    )
+    parser.add_argument(
+        '--cost-metrics',
+        type=str,
+        choices=[
+            'Metrics/Max_angle_violation',
+            'Metrics/TestEpCost',
+            'Metrics/EpCost',
+        ],
+        default='Metrics/EpCost',
+        help='Specify the cost metric to be used.',
+    )
     args = parser.parse_args()
 
     plotter = Plotter()
@@ -48,4 +69,6 @@
         select=args.select,
         exclude=args.exclude,
         estimator=args.estimator,
+        cost_metrics=args.cost_metrics,
+        reward_metrics=args.reward_metrics,
     )
diff --git a/omnisafe/common/experiment_grid.py b/omnisafe/common/experiment_grid.py
index f93cef8d3..c47e5312a 100644
--- a/omnisafe/common/experiment_grid.py
+++ b/omnisafe/common/experiment_grid.py
@@ -1,4 +1,4 @@
-# Copyright 2023 OmniSafe Team. All Rights Reserved.
+# Copyright 2024 OmniSafe Team. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -544,6 +544,8 @@ def analyze(
         compare_num: int | None = None,
         cost_limit: float | None = None,
         show_image: bool = False,
+        reward_metrics: str = 'Metrics/EpRet',
+        cost_metrics: str = 'Metrics/EpCost',
     ) -> None:
         """Analyze the experiment results.
 
@@ -559,6 +561,8 @@ def analyze(
             cost_limit (float or None, optional): Value for one line showed on graph to indicate
                 cost. Defaults to None.
             show_image (bool): Whether to show graph image in GUI windows.
+            reward_metrics (str, optional): The column name for reward metrics. Defaults to 'Metrics/EpReward'.
+            cost_metrics (str, optional): The column name for cost metrics. Defaults to 'Metrics/EpCost'.
         """
         assert self._statistical_tools is not None, 'Please run run() first!'
         self._statistical_tools.load_source(self.log_dir)
@@ -568,6 +572,8 @@ def analyze(
             compare_num,
             cost_limit,
             show_image=show_image,
+            reward_metrics=reward_metrics,
+            cost_metrics=cost_metrics,
         )
 
     def evaluate(self, num_episodes: int = 10, cost_criteria: float = 1.0) -> None:
diff --git a/omnisafe/common/statistics_tools.py b/omnisafe/common/statistics_tools.py
index 3856b81a7..d2082918f 100644
--- a/omnisafe/common/statistics_tools.py
+++ b/omnisafe/common/statistics_tools.py
@@ -1,4 +1,4 @@
-# Copyright 2023 OmniSafe Team. All Rights Reserved.
+# Copyright 2024 OmniSafe Team. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -83,6 +83,7 @@ def load_source(self, path: str) -> None:
                 'The config file is not found in the save directory.',
             ) from error
 
+    # pylint: disable-next=too-many-arguments, too-many-locals
     def draw_graph(
         self,
         parameter: str,
@@ -91,6 +92,8 @@ def draw_graph(
         cost_limit: float | None = None,
         smooth: int = 1,
         show_image: bool = False,
+        reward_metrics: str = 'Metrics/EpRet',
+        cost_metrics: str = 'Metrics/EpCost',
     ) -> None:
         """Draw graph.
 
@@ -102,6 +105,8 @@ def draw_graph(
             cost_limit (float or None, optional): The cost limit of the experiment. Defaults to None.
             smooth (int, optional): The smooth window size. Defaults to 1.
             show_image (bool): Whether to show graph image in GUI windows.
+            reward_metrics (str, optional): The column name for reward metrics. Defaults to 'Metrics/EpReward'.
+            cost_metrics (str, optional): The column name for cost metrics. Defaults to 'Metrics/EpCost'.
 
         .. note::
             `values` and `compare_num` cannot be set at the same time.
@@ -161,6 +166,8 @@ def draw_graph(
                     'mean',
                     save_name=save_name,
                     show_image=show_image,
+                    reward_metrics=reward_metrics,
+                    cost_metrics=cost_metrics,
                 )
             except Exception:  # noqa # pragma: no cover # pylint: disable=broad-except
                 print(
diff --git a/omnisafe/utils/plotter.py b/omnisafe/utils/plotter.py
index f24a97bb4..29e22caa9 100644
--- a/omnisafe/utils/plotter.py
+++ b/omnisafe/utils/plotter.py
@@ -1,4 +1,4 @@
-# Copyright 2023 OmniSafe Team. All Rights Reserved.
+# Copyright 2024 OmniSafe Team. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -164,7 +164,13 @@ def plot_data(
 
         plt.tight_layout(pad=0.5)
 
-    def get_datasets(self, logdir: str, condition: str | None = None) -> list[DataFrame]:
+    def get_datasets(
+        self,
+        logdir: str,
+        condition: str | None = None,
+        reward_metrics: str = 'Metrics/EpReward',
+        cost_metrics: str = 'Metrics/EpCost',
+    ) -> list[DataFrame]:
         """Recursively look through logdir for files named "progress.txt".
 
         Assumes that any file "progress.txt" is a valid hit.
@@ -172,9 +178,11 @@ def get_datasets(self, logdir: str, condition: str | None = None) -> list[DataFr
         Args:
             logdir (str): The directory to search for progress.txt files
             condition (str or None, optional): The condition label. Defaults to None.
+            reward_metrics (str, optional): The column name for reward metrics. Defaults to 'Metrics/EpReward'.
+            cost_metrics (str, optional): The column name for cost metrics. Defaults to 'Metrics/EpCost'.
 
         Returns:
-            The datasets.
+            list[DataFrame]: A list of DataFrame objects containing the datasets.
 
         Raise:
             FileNotFoundError: If the config file is not found.
@@ -204,21 +212,21 @@ def get_datasets(self, logdir: str, condition: str | None = None) -> list[DataFr
                 self.units[condition1] += 1
                 try:
                     exp_data = pd.read_csv(os.path.join(root, 'progress.csv'))
-
                 except FileNotFoundError as error:
                     progress_path = os.path.join(root, 'progress.csv')
                     raise FileNotFoundError(f'Could not read from {progress_path}') from error
-                performance = (
-                    'Metrics/TestEpRet' if 'Metrics/TestEpRet' in exp_data else 'Metrics/EpRet'
-                )
-                cost_performance = (
-                    'Metrics/TestEpCost' if 'Metrics/TestEpCost' in exp_data else 'Metrics/EpCost'
-                )
+
+                if reward_metrics not in exp_data:
+                    raise KeyError(f'{reward_metrics} is not in data to plot!')
+
+                if cost_metrics not in exp_data:
+                    raise KeyError(f'{cost_metrics} is not in data to plot!')
+
                 exp_data.insert(len(exp_data.columns), 'Unit', unit)
                 exp_data.insert(len(exp_data.columns), 'Condition1', condition1)
                 exp_data.insert(len(exp_data.columns), 'Condition2', condition2)
-                exp_data.insert(len(exp_data.columns), 'Rewards', exp_data[performance])
-                exp_data.insert(len(exp_data.columns), 'Costs', exp_data[cost_performance])
+                exp_data.insert(len(exp_data.columns), 'Rewards', exp_data[reward_metrics])
+                exp_data.insert(len(exp_data.columns), 'Costs', exp_data[cost_metrics])
                 epoch = exp_data.get('Train/Epoch')
                 if epoch is None or steps_per_epoch is None:
                     raise ValueError('No Train/Epoch column in progress.csv')
@@ -236,6 +244,8 @@ def get_all_datasets(
         legend: list[str] | None = None,
         select: str | None = None,
         exclude: str | None = None,
+        reward_metrics: str = 'Metrics/EpCost',
+        cost_metrics: str = 'Metrics/EpCost',
     ) -> list[DataFrame]:
         """Get all the data from all the log directories.
 
@@ -248,6 +258,8 @@ def get_all_datasets(
             legend (list of str or None, optional): List of legend names. Defaults to None.
             select (str or None, optional): Select logdirs that contain this string. Defaults to None.
             exclude (str or None, optional): Exclude logdirs that contain this string. Defaults to None.
+            reward_metrics (str, optional): The column name for reward metrics. Defaults to 'Metrics/EpReward'.
+            cost_metrics (str, optional): The column name for cost metrics. Defaults to 'Metrics/EpCost'.
 
         Returns:
             All the data stored in a list of DataFrames.
@@ -285,13 +297,22 @@ def get_all_datasets(
         data = []
         if legend:
             for log, leg in zip(logdirs, legend):
-                data += self.get_datasets(log, leg)
+                data += self.get_datasets(
+                    log,
+                    leg,
+                    cost_metrics=cost_metrics,
+                    reward_metrics=reward_metrics,
+                )
         else:
             for log in logdirs:
-                data += self.get_datasets(log)
+                data += self.get_datasets(
+                    log,
+                    cost_metrics=cost_metrics,
+                    reward_metrics=reward_metrics,
+                )
         return data
 
-    # pylint: disable-next=too-many-arguments
+    # pylint: disable-next=too-many-arguments, too-many-locals
     def make_plots(
         self,
         all_logdirs: list[str],
@@ -308,6 +329,8 @@ def make_plots(
         save_name: str | None = None,
         save_format: str = 'png',
         show_image: bool = False,
+        reward_metrics: str = 'Metrics/EpCost',
+        cost_metrics: str = 'Metrics/EpCost',
     ) -> None:
         """Make plots from the data in the specified log directories.
 
@@ -355,9 +378,18 @@ def make_plots(
                 to ``png``.
             show_image (bool, optional): Optional flag. If set, the plot will be displayed on screen.
                 Defaults to ``False``.
+            reward_metrics (str, optional): The column name for reward metrics. Defaults to 'Metrics/EpReward'.
+            cost_metrics (str, optional): The column name for cost metrics. Defaults to 'Metrics/EpCost'.
         """
         assert xaxis is not None, 'Must specify xaxis'
-        data = self.get_all_datasets(all_logdirs, legend, select, exclude)
+        data = self.get_all_datasets(
+            all_logdirs,
+            legend,
+            select,
+            exclude,
+            cost_metrics=cost_metrics,
+            reward_metrics=reward_metrics,
+        )
         condition = 'Condition2' if count else 'Condition1'
         # choose what to show on main curve: mean? max? min?
         estimator = getattr(np, estimator)

From 7423bc16ad7dc2d225c79865a9506b04885f894c Mon Sep 17 00:00:00 2001
From: Gaiejj <gaiejj@outlook.com>
Date: Wed, 19 Jun 2024 20:15:13 +0800
Subject: [PATCH 12/18] fix: fix cuda error

---
 omnisafe/adapter/beta_barrier_function_adapter.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/omnisafe/adapter/beta_barrier_function_adapter.py b/omnisafe/adapter/beta_barrier_function_adapter.py
index 9364b5282..1ab488d88 100644
--- a/omnisafe/adapter/beta_barrier_function_adapter.py
+++ b/omnisafe/adapter/beta_barrier_function_adapter.py
@@ -96,6 +96,7 @@ def vectorized_f_(obs: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
         Returns:
             tuple: Two torch tensors representing the lower and upper bounds for each observation in the batch.
         """
+        device = obs.device
         obs = obs.cpu().detach().numpy()
 
         batch_size = obs.shape[0]
@@ -104,8 +105,8 @@ def vectorized_f_(obs: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
         for i in range(batch_size):
             lbs[i], ubs[i] = f(obs[i])
 
-        lbs = torch.FloatTensor(lbs).reshape(batch_size, 1)
-        ubs = torch.FloatTensor(ubs).reshape(batch_size, 1)
+        lbs = torch.FloatTensor(lbs).reshape(batch_size, 1).to(device)
+        ubs = torch.FloatTensor(ubs).reshape(batch_size, 1).to(device)
 
         return lbs, ubs
 

From 39341928a818ea2af67975590a97df24be256558 Mon Sep 17 00:00:00 2001
From: Gaiejj <gaiejj@outlook.com>
Date: Wed, 19 Jun 2024 20:16:35 +0800
Subject: [PATCH 13/18] fix: fix compensator saving

---
 omnisafe/algorithms/off_policy/ddpg_cbf.py    | 13 +++++++++---
 .../on_policy/barrier_function/trpo_cbf.py    | 21 +++++++++++++++++--
 2 files changed, 29 insertions(+), 5 deletions(-)

diff --git a/omnisafe/algorithms/off_policy/ddpg_cbf.py b/omnisafe/algorithms/off_policy/ddpg_cbf.py
index 17dcacda3..f69310fff 100644
--- a/omnisafe/algorithms/off_policy/ddpg_cbf.py
+++ b/omnisafe/algorithms/off_policy/ddpg_cbf.py
@@ -51,7 +51,7 @@ def _init_env(self) -> None:
             self._seed,
             self._cfgs,
         )
-        solver = PendulumSolver(device=self._cfgs.train_cfgs.device)
+        solver = PendulumSolver(device=self._device)
         compensator = BarrierCompensator(
             obs_dim=self._env.observation_space.shape[0],
             act_dim=self._env.action_space.shape[0],
@@ -120,11 +120,18 @@ def _specific_save(self) -> None:
             os.makedirs(os.path.dirname(path), exist_ok=True)
             joblib.dump(self._env.gp_models, path)
 
-    def _log_what_to_save(self) -> dict[str, Any]:
-        """Define what need to be saved below."""
+    def _setup_torch_saver(self) -> None:
+        """Define what need to be saved below.
+
+        OmniSafe's main storage interface is based on PyTorch. If you need to save models in other
+        formats, please use :meth:`_specific_save`.
+        """
         what_to_save: dict[str, Any] = {}
 
         what_to_save['pi'] = self._actor_critic.actor
         what_to_save['compensator'] = self._env.compensator
+        if self._cfgs.algo_cfgs.obs_normalize:
+            obs_normalizer = self._env.save()['obs_normalizer']
+            what_to_save['obs_normalizer'] = obs_normalizer
 
         self._logger.setup_torch_saver(what_to_save)
diff --git a/omnisafe/algorithms/on_policy/barrier_function/trpo_cbf.py b/omnisafe/algorithms/on_policy/barrier_function/trpo_cbf.py
index 8125151d6..b0b64f892 100644
--- a/omnisafe/algorithms/on_policy/barrier_function/trpo_cbf.py
+++ b/omnisafe/algorithms/on_policy/barrier_function/trpo_cbf.py
@@ -28,6 +28,7 @@
 from omnisafe.algorithms.on_policy.base.trpo import TRPO
 from omnisafe.common.barrier_comp import BarrierCompensator
 from omnisafe.common.barrier_solver import PendulumSolver
+from omnisafe.typing import Any
 from omnisafe.utils import distributed
 from omnisafe.utils.distributed import get_rank
 
@@ -70,12 +71,12 @@ def _init_env(self) -> None:
             // distributed.world_size()
             // self._cfgs.train_cfgs.vector_env_nums
         )
-        self.solver = PendulumSolver(device=self._cfgs.train_cfgs.device)
+        self.solver = PendulumSolver(device=self._device)
         self.compensator = BarrierCompensator(
             obs_dim=self._env.observation_space.shape[0],
             act_dim=self._env.action_space.shape[0],
             cfgs=self._cfgs.compensator_cfgs,
-        )
+        ).to(self._device)
         self._env.set_solver(solver=self.solver)
         self._env.set_compensator(compensator=self.compensator)
 
@@ -165,3 +166,19 @@ def _specific_save(self) -> None:
             )
             os.makedirs(os.path.dirname(path), exist_ok=True)
             joblib.dump(self._env.gp_models, path)
+
+    def _setup_torch_saver(self) -> None:
+        """Define what need to be saved below.
+
+        OmniSafe's main storage interface is based on PyTorch. If you need to save models in other
+        formats, please use :meth:`_specific_save`.
+        """
+        what_to_save: dict[str, Any] = {}
+
+        what_to_save['pi'] = self._actor_critic.actor
+        what_to_save['compensator'] = self._env.compensator
+        if self._cfgs.algo_cfgs.obs_normalize:
+            obs_normalizer = self._env.save()['obs_normalizer']
+            what_to_save['obs_normalizer'] = obs_normalizer
+
+        self._logger.setup_torch_saver(what_to_save)

From 9809987454f81c188af301de880cd993d47f40ed Mon Sep 17 00:00:00 2001
From: Gaiejj <gaiejj@outlook.com>
Date: Wed, 19 Jun 2024 20:28:35 +0800
Subject: [PATCH 14/18] style: fix spelling

---
 omnisafe/common/experiment_grid.py  | 2 +-
 omnisafe/common/statistics_tools.py | 2 +-
 omnisafe/utils/plotter.py           | 8 ++++----
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/omnisafe/common/experiment_grid.py b/omnisafe/common/experiment_grid.py
index c47e5312a..787f4592f 100644
--- a/omnisafe/common/experiment_grid.py
+++ b/omnisafe/common/experiment_grid.py
@@ -561,7 +561,7 @@ def analyze(
             cost_limit (float or None, optional): Value for one line showed on graph to indicate
                 cost. Defaults to None.
             show_image (bool): Whether to show graph image in GUI windows.
-            reward_metrics (str, optional): The column name for reward metrics. Defaults to 'Metrics/EpReward'.
+            reward_metrics (str, optional): The column name for reward metrics. Defaults to 'Metrics/EpRet'.
             cost_metrics (str, optional): The column name for cost metrics. Defaults to 'Metrics/EpCost'.
         """
         assert self._statistical_tools is not None, 'Please run run() first!'
diff --git a/omnisafe/common/statistics_tools.py b/omnisafe/common/statistics_tools.py
index d2082918f..72e661c33 100644
--- a/omnisafe/common/statistics_tools.py
+++ b/omnisafe/common/statistics_tools.py
@@ -105,7 +105,7 @@ def draw_graph(
             cost_limit (float or None, optional): The cost limit of the experiment. Defaults to None.
             smooth (int, optional): The smooth window size. Defaults to 1.
             show_image (bool): Whether to show graph image in GUI windows.
-            reward_metrics (str, optional): The column name for reward metrics. Defaults to 'Metrics/EpReward'.
+            reward_metrics (str, optional): The column name for reward metrics. Defaults to 'Metrics/EpRet'.
             cost_metrics (str, optional): The column name for cost metrics. Defaults to 'Metrics/EpCost'.
 
         .. note::
diff --git a/omnisafe/utils/plotter.py b/omnisafe/utils/plotter.py
index 29e22caa9..e592240be 100644
--- a/omnisafe/utils/plotter.py
+++ b/omnisafe/utils/plotter.py
@@ -168,7 +168,7 @@ def get_datasets(
         self,
         logdir: str,
         condition: str | None = None,
-        reward_metrics: str = 'Metrics/EpReward',
+        reward_metrics: str = 'Metrics/EpRet',
         cost_metrics: str = 'Metrics/EpCost',
     ) -> list[DataFrame]:
         """Recursively look through logdir for files named "progress.txt".
@@ -178,7 +178,7 @@ def get_datasets(
         Args:
             logdir (str): The directory to search for progress.txt files
             condition (str or None, optional): The condition label. Defaults to None.
-            reward_metrics (str, optional): The column name for reward metrics. Defaults to 'Metrics/EpReward'.
+            reward_metrics (str, optional): The column name for reward metrics. Defaults to 'Metrics/EpRet'.
             cost_metrics (str, optional): The column name for cost metrics. Defaults to 'Metrics/EpCost'.
 
         Returns:
@@ -258,7 +258,7 @@ def get_all_datasets(
             legend (list of str or None, optional): List of legend names. Defaults to None.
             select (str or None, optional): Select logdirs that contain this string. Defaults to None.
             exclude (str or None, optional): Exclude logdirs that contain this string. Defaults to None.
-            reward_metrics (str, optional): The column name for reward metrics. Defaults to 'Metrics/EpReward'.
+            reward_metrics (str, optional): The column name for reward metrics. Defaults to 'Metrics/EpRet'.
             cost_metrics (str, optional): The column name for cost metrics. Defaults to 'Metrics/EpCost'.
 
         Returns:
@@ -378,7 +378,7 @@ def make_plots(
                 to ``png``.
             show_image (bool, optional): Optional flag. If set, the plot will be displayed on screen.
                 Defaults to ``False``.
-            reward_metrics (str, optional): The column name for reward metrics. Defaults to 'Metrics/EpReward'.
+            reward_metrics (str, optional): The column name for reward metrics. Defaults to 'Metrics/EpRet'.
             cost_metrics (str, optional): The column name for cost metrics. Defaults to 'Metrics/EpCost'.
         """
         assert xaxis is not None, 'Must specify xaxis'

From 354dfa1878551e938f32e3010a051a1a8a66b5f6 Mon Sep 17 00:00:00 2001
From: Gaiejj <gaiejj@outlook.com>
Date: Thu, 4 Jul 2024 19:38:08 +0800
Subject: [PATCH 15/18] refactor(cbf): refactor CBF methods

---
 .pre-commit-config.yaml                       |   4 +-
 omnisafe/adapter/barrier_function_adapter.py  | 155 ++++++++-----
 .../offpolicy_barrier_function_adapter.py     |  85 ++++---
 .../robust_barrier_function_adapter.py        |   6 +-
 omnisafe/algorithms/off_policy/ddpg_cbf.py    |  48 +---
 omnisafe/algorithms/off_policy/sac_rcbf.py    |  17 +-
 .../on_policy/barrier_function/trpo_cbf.py    |  99 +-------
 omnisafe/common/__init__.py                   |   3 +
 omnisafe/common/barrier_comp.py               |   6 +-
 omnisafe/common/barrier_solver.py             | 192 +++------------
 omnisafe/common/gp_model.py                   | 218 ++++++++++++++++++
 omnisafe/common/robust_gp_model.py            |  12 +-
 omnisafe/evaluator.py                         |  75 +++---
 pyproject.toml                                |   1 -
 14 files changed, 482 insertions(+), 439 deletions(-)
 create mode 100644 omnisafe/common/gp_model.py

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 51cbb81c1..2f04378f6 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -29,7 +29,7 @@ repos:
       - id: debug-statements
       - id: double-quote-string-fixer
   - repo: https://github.com/charliermarsh/ruff-pre-commit
-    rev: v0.4.4
+    rev: v0.5.0
     hooks:
       - id: ruff
         args: [--fix, --exit-non-zero-on-fix]
@@ -54,7 +54,7 @@ repos:
       - id: pyupgrade
         args: [--py38-plus] # sync with requires-python
   - repo: https://github.com/pycqa/flake8
-    rev: 7.0.0
+    rev: 7.1.0
     hooks:
       - id: flake8
         additional_dependencies:
diff --git a/omnisafe/adapter/barrier_function_adapter.py b/omnisafe/adapter/barrier_function_adapter.py
index c5581400a..c247f7705 100644
--- a/omnisafe/adapter/barrier_function_adapter.py
+++ b/omnisafe/adapter/barrier_function_adapter.py
@@ -16,6 +16,8 @@
 
 from __future__ import annotations
 
+from typing import Any
+
 import torch
 from rich.progress import track
 from sklearn.gaussian_process import GaussianProcessRegressor
@@ -24,6 +26,7 @@
 from omnisafe.common.barrier_comp import BarrierCompensator
 from omnisafe.common.barrier_solver import PendulumSolver
 from omnisafe.common.buffer import VectorOnPolicyBuffer
+from omnisafe.common.gp_model import DynamicsModel
 from omnisafe.common.logger import Logger
 from omnisafe.envs.wrapper import AutoReset, CostNormalize, RewardNormalize, TimeLimit, Unsqueeze
 from omnisafe.models.actor_critic.constraint_actor_critic import ConstraintActorCritic
@@ -47,9 +50,29 @@ class BarrierFunctionAdapter(OnPolicyAdapter):
     def __init__(self, env_id: str, num_envs: int, seed: int, cfgs: Config) -> None:
         """Initialize an instance of :class:`BarrierFunctionAdapter`."""
         super().__init__(env_id, num_envs, seed, cfgs)
-        self.solver: PendulumSolver
-        self.compensator: BarrierCompensator
-        self.first_iter = 1
+
+        if env_id == 'Pendulum-v1':
+            self.solver: PendulumSolver = PendulumSolver(
+                action_size=self.action_space.shape[0],  # type: ignore
+                device=self._device,
+            )
+            self.dynamics_model: DynamicsModel = DynamicsModel(
+                observation_size=self.observation_space.shape[0],  # type: ignore
+            )
+        else:
+            raise NotImplementedError(f'Please implement solver for {env_id} !')
+        self.compensator: BarrierCompensator = BarrierCompensator(
+            obs_dim=self.observation_space.shape[0],  # type: ignore
+            act_dim=self.action_space.shape[0],  # type: ignore
+            cfgs=cfgs.compensator_cfgs,
+        ).to(self._device)
+        self.first_iter: bool = True
+
+        self.episode_rollout: dict[str, Any] = {}
+        self.episode_rollout['obs'] = []
+        self.episode_rollout['final_act'] = []
+        self.episode_rollout['approx_compensating_act'] = []
+        self.episode_rollout['compensating_act'] = []
 
     def _wrapper(
         self,
@@ -89,17 +112,9 @@ def _wrapper(
         if self._env.num_envs == 1:
             self._env = Unsqueeze(self._env, device=self._device)
 
-    def set_solver(self, solver: PendulumSolver) -> None:
-        """Set the barrier function solver for Pendulum environment."""
-        self.solver = solver
-
-    def set_compensator(self, compensator: BarrierCompensator) -> None:
-        """Set the action compensator."""
-        self.compensator = compensator
-
     def reset_gp_model(self) -> None:
         """Reset the gaussian processing model of barrier function solver."""
-        self.solver.reset_gp_model()
+        self.dynamics_model.reset_gp_model()
 
     def rollout(  # pylint: disable=too-many-locals,too-many-branches
         self,
@@ -118,12 +133,10 @@ def rollout(  # pylint: disable=too-many-locals,too-many-branches
             logger (Logger): Logger, to log ``EpRet``, ``EpCost``, ``EpLen``.
         """
         self._reset_log()
-        if not self.first_iter:
-            self.reset_gp_model()
 
         obs, _ = self.reset()
-        path_obs = []
-        path_act = []
+        self.episode_rollout['obs'] = []
+        self.episode_rollout['final_act'] = []
         for step in track(
             range(steps_per_epoch),
             description=f'Processing rollout for epoch: {logger.current_epoch}...',
@@ -134,46 +147,29 @@ def rollout(  # pylint: disable=too-many-locals,too-many-branches
                 act_dist = agent.actor(obs)
                 act_mean, act_std = act_dist.mean, agent.actor.std
 
-                approx_compensating_act = self.compensator(obs=obs)
-                compensated_act_mean_raw = act_mean + approx_compensating_act
-
-                if self.first_iter:
-                    [f, g, x, std] = self.solver.get_gp_dynamics(obs, use_prev_model=False)
-                else:
-                    [f, g, x, std] = self.solver.get_gp_dynamics(obs, use_prev_model=True)
-
-                compensating_act = self.solver.control_barrier(
-                    compensated_act_mean_raw,
-                    f,
-                    g,
-                    x,
-                    std,
+                safe_act = self.get_safe_action(
+                    obs,
+                    act_mean,
+                    act_std,
                 )
+                logp = agent.actor.log_prob(safe_act)
 
-                compensated_act_mean = compensated_act_mean_raw + compensating_act
-                final_act = torch.normal(compensated_act_mean, act_std)
-
-                logp = agent.actor.log_prob(final_act)
-
-            path_obs.append(obs)
-            path_act.append(final_act)
-
-            next_obs, reward, cost, terminated, truncated, info = self.step(final_act)
+            self.episode_rollout['obs'].append(obs)
+            self.episode_rollout['final_act'].append(safe_act)
 
+            next_obs, reward, cost, terminated, truncated, info = self.step(safe_act)
             self._log_value(reward=reward, cost=cost, info=info)
 
             logger.store({'Value/reward': value_r})
 
             buffer.store(
                 obs=obs,
-                act=final_act,
+                act=safe_act,
                 reward=reward,
                 cost=cost,
                 value_r=value_r,
                 value_c=value_c,
                 logp=logp,
-                approx_compensating_act=approx_compensating_act.detach(),
-                compensating_act=compensating_act.detach(),
             )
 
             obs = next_obs
@@ -203,25 +199,72 @@ def rollout(  # pylint: disable=too-many-locals,too-many-branches
 
                     if done or time_out:
                         self._log_metrics(logger, idx)
+                        compensator_loss = self.compensator.update(
+                            torch.cat(self.episode_rollout['obs']),
+                            torch.cat(self.episode_rollout['approx_compensating_act']),
+                            torch.cat(self.episode_rollout['compensating_act']),
+                        )
+                        logger.store({'Value/Loss_compensator': compensator_loss.item()})
+                        self.dynamics_model.update_gp_dynamics(
+                            obs=torch.cat(self.episode_rollout['obs']),  # type: ignore
+                            act=torch.cat(self.episode_rollout['final_act']),  # type: ignore
+                        )
+
+                        self.episode_rollout['obs'] = []
+                        self.episode_rollout['final_act'] = []
+                        self.episode_rollout['approx_compensating_act'] = []
+                        self.episode_rollout['compensating_act'] = []
+
                         self._reset_log(idx)
+                        obs, _ = self.reset()
+                    buffer.finish_path(last_value_r, last_value_c, idx)
+        self.first_iter = False
+        self.reset_gp_model()
 
-                        self._ep_ret[idx] = 0.0
-                        self._ep_cost[idx] = 0.0
-                        self._ep_len[idx] = 0.0
+    def get_safe_action(
+        self,
+        obs: torch.Tensor,
+        act_mean: torch.Tensor,
+        act_std: torch.Tensor,
+    ) -> torch.Tensor:
+        """Computes a safe action by applying compensatory actions.
 
-                        if step < self._cfgs.algo_cfgs.update_dynamics_steps:
-                            self.solver.update_gp_dynamics(
-                                obs=torch.cat(path_obs),  # type: ignore
-                                act=torch.cat(path_act),  # type: ignore
-                            )
+        .. note::
+            This is the core method of the CBF method. Users can modify this function to implement
+            customized action mapping.
 
-                        path_obs = []
-                        path_act = []
-                        obs, _ = self.reset()
-                    buffer.finish_path(last_value_r, last_value_c, idx)
-        self.first_iter = 0
+        Args:
+            obs (torch.Tensor): The current observation from the environment.
+            act_mean (torch.Tensor): The mean of proposed action to be controlled for safety.
+            act_std (torch.Tensor): The standard deviation of proposed action to be controlled for safety.
+
+        Returns:
+            list(torch.Tensor): The safe actions for interaction and compensating actions for compensator training.
+        """
+        with torch.no_grad():
+            approx_compensating_act = self.compensator(obs=obs)
+            compensated_act_mean_raw = act_mean + approx_compensating_act
+
+            [f, g, x, std] = self.dynamics_model.get_gp_dynamics(
+                obs,
+                use_prev_model=not self.first_iter,
+            )
+            compensating_act = self.solver.control_barrier(
+                original_action=compensated_act_mean_raw,
+                f=f,
+                g=g,
+                x=x,
+                std=std,
+            )
+
+            compensated_act_mean = compensated_act_mean_raw + compensating_act
+            safe_act = torch.normal(compensated_act_mean, act_std)
+            self.episode_rollout['compensating_act'].append(compensating_act)
+            self.episode_rollout['approx_compensating_act'].append(approx_compensating_act)
+
+        return safe_act
 
     @property
     def gp_models(self) -> list[GaussianProcessRegressor]:
         """Return the gp models to be saved."""
-        return self.solver.gp_models
+        return self.dynamics_model.gp_models
diff --git a/omnisafe/adapter/offpolicy_barrier_function_adapter.py b/omnisafe/adapter/offpolicy_barrier_function_adapter.py
index e6bff40d6..20b4abdb8 100644
--- a/omnisafe/adapter/offpolicy_barrier_function_adapter.py
+++ b/omnisafe/adapter/offpolicy_barrier_function_adapter.py
@@ -1,4 +1,4 @@
-# Copyright 2023 OmniSafe Team. All Rights Reserved.
+# Copyright 2024 OmniSafe Team. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -25,6 +25,7 @@
 from omnisafe.common.barrier_comp import BarrierCompensator
 from omnisafe.common.barrier_solver import PendulumSolver
 from omnisafe.common.buffer import VectorOffPolicyBuffer
+from omnisafe.common.gp_model import DynamicsModel
 from omnisafe.common.logger import Logger
 from omnisafe.envs.wrapper import CostNormalize, RewardNormalize, Unsqueeze
 from omnisafe.models.actor_critic.constraint_actor_q_critic import ConstraintActorQCritic
@@ -34,21 +35,47 @@
 class OffPolicyBarrierFunctionAdapter(OffPolicyAdapter):
     """OffPolicy Barrier Function Adapter for OmniSafe.
 
-    :class:`OffPolicyBarrierFunctionAdapter` is used to adapt the environment with CBF controller.
+    :class:`OffPolicyBarrierFunctionAdapter` is used to adapt the environment with a CBF controller,
+    mapping the agent actions from unsafe ones to safe ones.
 
     Args:
         env_id (str): The environment id.
         num_envs (int): The number of environments.
         seed (int): The random seed.
         cfgs (Config): The configuration.
+
+    Attributes:
+        solver (PendulumSolver): The solver used for the environment, currently supporting
+                                ``Pendulum-v1``.
+        dynamics_model (DynamicsModel): The dynamics model used to predict the environment's behavior.
+        compensator (BarrierCompensator): The compensator used to approximate previous actions.
+        first_iter (bool): A flag indicating if it is the first iteration.
+        episode_rollout (dict[str, Any]): A dictionary to store the episode rollout information,
+                                          including observations and various actions,
+                                          useful for updating compensator.
     """
 
     def __init__(self, env_id: str, num_envs: int, seed: int, cfgs: Config) -> None:
-        """Initialize an instance of :class:`BarrierFunctionAdapter`."""
+        """Initialize an instance of :class:`OffPolicyBarrierFunctionAdapter`."""
         super().__init__(env_id, num_envs, seed, cfgs)
-        self.solver: PendulumSolver
-        self.compensator: BarrierCompensator
-        self.first_iter: int = 1
+
+        if env_id == 'Pendulum-v1':
+            self.solver: PendulumSolver = PendulumSolver(
+                action_size=self.action_space.shape[0],  # type: ignore
+                device=self._device,
+            )
+            self.dynamics_model: DynamicsModel = DynamicsModel(
+                observation_size=self.observation_space.shape[0],  # type: ignore
+            )
+        else:
+            raise NotImplementedError(f'Please implement solver for {env_id} !')
+        self.compensator: BarrierCompensator = BarrierCompensator(
+            obs_dim=self.observation_space.shape[0],  # type: ignore
+            act_dim=self.action_space.shape[0],  # type: ignore
+            cfgs=cfgs.compensator_cfgs,
+        ).to(self._device)
+
+        self.first_iter: bool = True
         self.episode_rollout: dict[str, Any] = {}
         self.episode_rollout['obs'] = []
         self.episode_rollout['final_act'] = []
@@ -110,17 +137,9 @@ def eval_policy(  # pylint: disable=too-many-locals
                 },
             )
 
-    def set_solver(self, solver: PendulumSolver) -> None:
-        """Set the barrier function solver for Pendulum environment."""
-        self.solver = solver
-
-    def set_compensator(self, compensator: BarrierCompensator) -> None:
-        """Set the action compensator."""
-        self.compensator = compensator
-
     def reset_gp_model(self) -> None:
         """Reset the gaussian processing model of barrier function solver."""
-        self.solver.reset_gp_model()
+        self.dynamics_model.reset_gp_model()
 
     def rollout(  # pylint: disable=too-many-locals
         self,
@@ -130,7 +149,7 @@ def rollout(  # pylint: disable=too-many-locals
         logger: Logger,
         use_rand_action: bool,
     ) -> None:
-        """Rollout in off-policy manner with barrier function controller.
+        """Rollout in off-policy manner with the ``dynamics_model``, ``solver`` and ``compensator``.
 
         Args:
             rollout_step (int): Number of rollout steps.
@@ -173,7 +192,7 @@ def rollout(  # pylint: disable=too-many-locals
                         torch.cat(self.episode_rollout['compensating_act']),
                     )
                     logger.store({'Value/Loss_compensator': compensator_loss.item()})
-                    self.solver.update_gp_dynamics(
+                    self.dynamics_model.update_gp_dynamics(
                         obs=torch.cat(self.episode_rollout['obs']),  # type: ignore
                         act=torch.cat(self.episode_rollout['final_act']),  # type: ignore
                     )
@@ -185,9 +204,8 @@ def rollout(  # pylint: disable=too-many-locals
 
                     self._reset_log(idx)
                     self._current_obs, _ = self._env.reset()
-                    self.first_iter = 0
-                    if not self.first_iter:
-                        self.reset_gp_model()
+                    self.first_iter = False
+                    self.reset_gp_model()
 
     def get_safe_action(
         self,
@@ -197,24 +215,33 @@ def get_safe_action(
     ) -> torch.Tensor:
         """Computes a safe action by applying compensatory actions.
 
+        .. note::
+            This is the core method of the CBF method. Users can modify this function to implement
+            customized action mapping.
+
         Args:
             obs (torch.Tensor): The current observation from the environment.
-            act (torch.Tensor): The proposed action to be evaluated for safety.
+            act (torch.Tensor): The proposed action to be controlled for safety.
             is_eval (bool, optional): A flag to indicate whether this is an evaluation phase, defaulting to False.
 
         Returns:
             torch.Tensor: The safe action to be executed in the environment.
         """
         with torch.no_grad():
-            approx_compensating_act = self.compensator(obs=self._current_obs)
+            approx_compensating_act = self.compensator(obs=obs)
             compensated_act_mean_raw = act + approx_compensating_act
 
-            if self.first_iter:
-                [f, g, x, std] = self.solver.get_gp_dynamics(obs, use_prev_model=False)
-            else:
-                [f, g, x, std] = self.solver.get_gp_dynamics(obs, use_prev_model=True)
-
-            compensating_act = self.solver.control_barrier(compensated_act_mean_raw, f, g, x, std)
+            [f, g, x, std] = self.dynamics_model.get_gp_dynamics(
+                obs,
+                use_prev_model=not self.first_iter,
+            )
+            compensating_act = self.solver.control_barrier(
+                original_action=compensated_act_mean_raw,
+                f=f,
+                g=g,
+                x=x,
+                std=std,
+            )
             safe_act = compensated_act_mean_raw + compensating_act
 
             if not is_eval:
@@ -226,4 +253,4 @@ def get_safe_action(
     @property
     def gp_models(self) -> list[GaussianProcessRegressor]:
         """Return the gp models to be saved."""
-        return self.solver.gp_models
+        return self.dynamics_model.gp_models
diff --git a/omnisafe/adapter/robust_barrier_function_adapter.py b/omnisafe/adapter/robust_barrier_function_adapter.py
index ade39d12f..cc5a22e02 100644
--- a/omnisafe/adapter/robust_barrier_function_adapter.py
+++ b/omnisafe/adapter/robust_barrier_function_adapter.py
@@ -60,9 +60,9 @@ def _wrapper(
         """Wrapper the environment.
 
         .. warning::
-            Since solving the optimization problem requires obtaining physical quantities with practical
-            significance from state observations, the Barrier Function Adapter does not support
-            normalization of observations.
+            Since solving the optimization problem requires obtaining physical quantities with
+            practical significance from state observations, the Barrier Function Adapter does not
+            support normalization of observations.
 
         Args:
             obs_normalize (bool, optional): Whether to normalize the observation. Defaults to False.
diff --git a/omnisafe/algorithms/off_policy/ddpg_cbf.py b/omnisafe/algorithms/off_policy/ddpg_cbf.py
index f69310fff..6df1fcbb3 100644
--- a/omnisafe/algorithms/off_policy/ddpg_cbf.py
+++ b/omnisafe/algorithms/off_policy/ddpg_cbf.py
@@ -21,13 +21,10 @@
 import os
 
 import joblib
-import torch
 
 from omnisafe.adapter.offpolicy_barrier_function_adapter import OffPolicyBarrierFunctionAdapter
 from omnisafe.algorithms import registry
 from omnisafe.algorithms.off_policy.ddpg import DDPG
-from omnisafe.common.barrier_comp import BarrierCompensator
-from omnisafe.common.barrier_solver import PendulumSolver
 from omnisafe.typing import Any
 from omnisafe.utils.distributed import get_rank
 
@@ -45,56 +42,13 @@ class DDPGCBF(DDPG):
     """
 
     def _init_env(self) -> None:
+        super()._init_env()
         self._env: OffPolicyBarrierFunctionAdapter = OffPolicyBarrierFunctionAdapter(
             self._env_id,
             self._cfgs.train_cfgs.vector_env_nums,
             self._seed,
             self._cfgs,
         )
-        solver = PendulumSolver(device=self._device)
-        compensator = BarrierCompensator(
-            obs_dim=self._env.observation_space.shape[0],
-            act_dim=self._env.action_space.shape[0],
-            cfgs=self._cfgs.compensator_cfgs,
-        ).to(self._device)
-
-        self._env.set_compensator(compensator=compensator)
-        self._env.set_solver(solver=solver)
-
-        assert (
-            self._cfgs.algo_cfgs.steps_per_epoch % self._cfgs.train_cfgs.vector_env_nums == 0
-        ), 'The number of steps per epoch is not divisible by the number of environments.'
-
-        assert (
-            int(self._cfgs.train_cfgs.total_steps) % self._cfgs.algo_cfgs.steps_per_epoch == 0
-        ), 'The total number of steps is not divisible by the number of steps per epoch.'
-        self._epochs: int = int(
-            self._cfgs.train_cfgs.total_steps // self._cfgs.algo_cfgs.steps_per_epoch,
-        )
-        self._epoch: int = 0
-        self._steps_per_epoch: int = (
-            self._cfgs.algo_cfgs.steps_per_epoch // self._cfgs.train_cfgs.vector_env_nums
-        )
-
-        self._update_cycle: int = self._cfgs.algo_cfgs.update_cycle
-        assert (
-            self._steps_per_epoch % self._update_cycle == 0
-        ), 'The number of steps per epoch is not divisible by the number of steps per sample.'
-        self._samples_per_epoch: int = self._steps_per_epoch // self._update_cycle
-        self._update_count: int = 0
-
-    def _init(self) -> None:
-        super()._init()
-        self._buf.add_field(
-            name='approx_compensating_act',
-            shape=self._env.action_space.shape,
-            dtype=torch.float32,
-        )
-        self._buf.add_field(
-            name='compensating_act',
-            shape=self._env.action_space.shape,
-            dtype=torch.float32,
-        )
 
     def _init_log(self) -> None:
         """Log the DDPGCBF specific information.
diff --git a/omnisafe/algorithms/off_policy/sac_rcbf.py b/omnisafe/algorithms/off_policy/sac_rcbf.py
index e6c020770..598c4a14c 100644
--- a/omnisafe/algorithms/off_policy/sac_rcbf.py
+++ b/omnisafe/algorithms/off_policy/sac_rcbf.py
@@ -49,13 +49,16 @@ def _init_env(self) -> None:
             self._seed,
             self._cfgs,
         )
-        solver = CBFQPLayer(
-            env=self._env,
-            device=self._cfgs.train_cfgs.device,
-            gamma_b=self._cfgs.cbf_cfgs.gamma_b,
-            l_p=self._cfgs.cbf_cfgs.l_p,
-        )
-        dynamics_model = DynamicsModel(env=self._env)
+        if self._env_id == 'Unicycle':
+            solver = CBFQPLayer(
+                env=self._env,
+                device=self._cfgs.train_cfgs.device,
+                gamma_b=self._cfgs.cbf_cfgs.gamma_b,
+                l_p=self._cfgs.cbf_cfgs.l_p,
+            )
+            dynamics_model = DynamicsModel(env=self._env)
+        else:
+            raise NotImplementedError(f'Please implement solver for {self._env_id} !')
 
         self._env.set_dynamics_model(dynamics_model=dynamics_model)
         self._env.set_solver(solver=solver)
diff --git a/omnisafe/algorithms/on_policy/barrier_function/trpo_cbf.py b/omnisafe/algorithms/on_policy/barrier_function/trpo_cbf.py
index b0b64f892..0324170c4 100644
--- a/omnisafe/algorithms/on_policy/barrier_function/trpo_cbf.py
+++ b/omnisafe/algorithms/on_policy/barrier_function/trpo_cbf.py
@@ -1,4 +1,4 @@
-# Copyright 2023 OmniSafe Team. All Rights Reserved.
+# Copyright 2024 OmniSafe Team. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -20,16 +20,11 @@
 import os
 
 import joblib
-import torch
-from torch.utils.data import DataLoader, TensorDataset
 
 from omnisafe.adapter.barrier_function_adapter import BarrierFunctionAdapter
 from omnisafe.algorithms import registry
 from omnisafe.algorithms.on_policy.base.trpo import TRPO
-from omnisafe.common.barrier_comp import BarrierCompensator
-from omnisafe.common.barrier_solver import PendulumSolver
 from omnisafe.typing import Any
-from omnisafe.utils import distributed
 from omnisafe.utils.distributed import get_rank
 
 
@@ -57,103 +52,13 @@ def _init_log(self) -> None:
         self._logger.register_key('Value/Loss_compensator')
 
     def _init_env(self) -> None:
+        super()._init_env()
         self._env: BarrierFunctionAdapter = BarrierFunctionAdapter(
             self._env_id,
             self._cfgs.train_cfgs.vector_env_nums,
             self._seed,
             self._cfgs,
         )
-        assert (self._cfgs.algo_cfgs.steps_per_epoch) % (
-            distributed.world_size() * self._cfgs.train_cfgs.vector_env_nums
-        ) == 0, 'The number of steps per epoch is not divisible by the number of environments.'
-        self._steps_per_epoch: int = (
-            self._cfgs.algo_cfgs.steps_per_epoch
-            // distributed.world_size()
-            // self._cfgs.train_cfgs.vector_env_nums
-        )
-        self.solver = PendulumSolver(device=self._device)
-        self.compensator = BarrierCompensator(
-            obs_dim=self._env.observation_space.shape[0],
-            act_dim=self._env.action_space.shape[0],
-            cfgs=self._cfgs.compensator_cfgs,
-        ).to(self._device)
-        self._env.set_solver(solver=self.solver)
-        self._env.set_compensator(compensator=self.compensator)
-
-    def _init(self) -> None:
-        super()._init()
-        self._buf.add_field(
-            name='approx_compensating_act',
-            shape=self._env.action_space.shape,
-            dtype=torch.float32,
-        )
-        self._buf.add_field(
-            name='compensating_act',
-            shape=self._env.action_space.shape,
-            dtype=torch.float32,
-        )
-
-    def _update(self) -> None:
-        """Update actor, critic.
-
-        .. hint::
-            Here are some differences between NPG and Policy Gradient (PG): In PG, the actor network
-            and the critic network are updated together. When the KL divergence between the old
-            policy, and the new policy is larger than a threshold, the update is rejected together.
-
-            In NPG, the actor network and the critic network are updated separately. When the KL
-            divergence between the old policy, and the new policy is larger than a threshold, the
-            update of the actor network is rejected, but the update of the critic network is still
-            accepted.
-        """
-        data = self._buf.get()
-
-        (
-            obs,
-            act,
-            logp,
-            target_value_r,
-            adv_r,
-            adv_c,
-            approx_compensating_act,
-            compensating_act,
-        ) = (
-            data['obs'],
-            data['act'],
-            data['logp'],
-            data['target_value_r'],
-            data['adv_r'],
-            data['adv_c'],
-            data['approx_compensating_act'],
-            data['compensating_act'],
-        )
-
-        self._update_actor(obs, act, logp, adv_r, adv_c)
-        compensator_loss = self._env.compensator.update(
-            observation=obs,
-            approx_compensating_act=approx_compensating_act,
-            compensating_act=compensating_act,
-        )
-        dataloader = DataLoader(
-            dataset=TensorDataset(obs, target_value_r),
-            batch_size=self._cfgs.algo_cfgs.batch_size,
-            shuffle=True,
-        )
-
-        for _ in range(self._cfgs.algo_cfgs.update_iters):
-            for (
-                obs,
-                target_value_r,
-            ) in dataloader:
-                self._update_reward_critic(obs, target_value_r)
-
-        self._logger.store(
-            {
-                'Train/StopIter': self._cfgs.algo_cfgs.update_iters,
-                'Value/Adv': adv_r.mean().item(),
-                'Value/Loss_compensator': compensator_loss.item(),
-            },
-        )
 
     def _specific_save(self) -> None:
         """Save some algorithms specific models per epoch."""
diff --git a/omnisafe/common/__init__.py b/omnisafe/common/__init__.py
index 9e4fc1bf1..c1311f150 100644
--- a/omnisafe/common/__init__.py
+++ b/omnisafe/common/__init__.py
@@ -14,6 +14,9 @@
 # ==============================================================================
 """Common Common utilities for OmniSafe."""
 
+from omnisafe.common.barrier_comp import BarrierCompensator
+from omnisafe.common.barrier_solver import PendulumSolver
+from omnisafe.common.gp_model import DynamicsModel
 from omnisafe.common.lagrange import Lagrange
 from omnisafe.common.logger import Logger
 from omnisafe.common.normalizer import Normalizer
diff --git a/omnisafe/common/barrier_comp.py b/omnisafe/common/barrier_comp.py
index 891932188..64d1af104 100644
--- a/omnisafe/common/barrier_comp.py
+++ b/omnisafe/common/barrier_comp.py
@@ -27,9 +27,9 @@
 class BarrierCompensator(torch.nn.Module):
     """A module that represents a barrier compensator using a multi-layer perceptron (MLP) network.
 
-    This module is designed to compute actions based on observations, with the intention of compensating for
-    potential barriers in a control system or a similar application. It is built upon a configurable MLP network
-    and trained using an optimization routine.
+    This module is designed to compute actions based on observations, with the intention of
+    compensating for potential barriers in a control system or a similar application. It is built
+    upon a configurable MLP network and trained using an optimization routine.
 
     Attributes:
         obs_dim (int): Dimension of the observation space.
diff --git a/omnisafe/common/barrier_solver.py b/omnisafe/common/barrier_solver.py
index 35221281e..f281fd0e6 100644
--- a/omnisafe/common/barrier_solver.py
+++ b/omnisafe/common/barrier_solver.py
@@ -22,73 +22,70 @@
 
 import warnings
 
-import joblib
 import numpy as np
 import torch
 from cvxopt import matrix, solvers
-from sklearn.gaussian_process import GaussianProcessRegressor
-from sklearn.gaussian_process.kernels import RBF
-from sklearn.gaussian_process.kernels import ConstantKernel as C
+
+from omnisafe.typing import DEVICE_CPU
 
 
 # pylint: disable-next=too-many-instance-attributes
 class PendulumSolver:
-    """Solver for the pendulum problem using Gaussian Process models.
+    """The CBF solver for the pendulum problem using Gaussian Process models.
+
+    This class implements a solver for the pendulum control problem using Control Barrier Functions
+    (CBFs). The primary goal is to ensure safe reinforcement learning by maintaining
+    safety constraints during the control process.
+
+    For more details, please refer to:
+
+    *End-to-End Safe Reinforcement Learning through Barrier Functions for Safety-Critical Continuous
+    Control Tasks*
 
     Attributes:
-        action_size (int): Size of the action space.
-        observation_size (int): Size of the observation space.
-        torque_bound (float): Maximum torque bound.
-        max_speed (float): Maximum speed of the pendulum.
-        device (str): Device to run the computations on.
+        action_size (int): Size of the action space, typically 1 for the pendulum.
+        torque_bound (float): Maximum torque bound that can be applied to the pendulum.
+        max_speed (float): Maximum speed (angular velocity) of the pendulum.
+        device (torch.device): Device to run the computations on.
     """
 
     # pylint: disable-next=invalid-name
     def __init__(
         self,
         action_size: int = 1,
-        observation_size: int = 3,
         torque_bound: float = 15.0,
         max_speed: float = 60.0,
-        device: str = 'cpu',
+        device: torch.device = DEVICE_CPU,
     ) -> None:
-        """Initialize the PendulumSolver with specified parameters."""
+        """Initialize the PendulumSolver with specified parameters.
+
+        Args:
+            action_size (int): Size of the action space, typically 1 for the pendulum.
+            torque_bound (float): Maximum torque bound that can be applied to the pendulum.
+            max_speed (float): Maximum speed (angular velocity) of the pendulum.
+            device (torch.device): Device to run the computations on.
+
+        Attributes:
+            F (float): A control gain factor used in the CBF computation.
+            _gamma_b (float): Parameter for the barrier function.
+            _kd (float): Damping coefficient used in the barrier function.
+        """
         self.action_size = action_size
-        self.observation_size = observation_size
         self.torque_bound = torque_bound
         self.max_speed = max_speed
         self.F = 1.0
         self._device = device
         self._gamma_b = 0.5
         self._kd = 1.5
-        self.gp_model_prev: list[GaussianProcessRegressor, GaussianProcessRegressor]
-        self.gp_model: list[GaussianProcessRegressor, GaussianProcessRegressor]
-
         self._build_barrier()
-        self.build_gp_model()
         warnings.filterwarnings('ignore')
 
-    def build_gp_model(self, save_dir: str | None = None) -> None:
-        """Build the Gaussian Process model."""
-        gp_list = []
-        noise = 0.01
-        for _ in range(self.observation_size - 1):
-            if not save_dir:
-                kern = C(1.0, (1e-3, 1e3)) * RBF(10, (1e-2, 1e2))
-                gp = GaussianProcessRegressor(kernel=kern, alpha=noise, n_restarts_optimizer=10)
-                gp_list.append(gp)
-            else:
-                gp_list = joblib.load(save_dir)
-        self.gp_model = gp_list
-        self.gp_model_prev = gp_list.copy()
-
-    @property
-    def gp_models(self) -> list[GaussianProcessRegressor]:
-        """Return all gaussian process regressor for saving."""
-        return self.gp_model
-
     def _build_barrier(self) -> None:
-        """Build the barrier for the pendulum solver."""
+        """Construct the Control Barrier Function (CBF) for safe control of the pendulum.
+
+        This method initializes and sets up the necessary components for the CBF, which is used to
+        ensure that the control actions taken do not violate safety constraints.
+        """
         self.P = matrix(np.diag([1.0, 1e16]), tc='d')
         self.q = matrix(np.zeros(self.action_size + 1))
         self.h1 = np.array([1, 0.01])
@@ -193,122 +190,3 @@ def control_barrier(  # pylint: disable=invalid-name
             print('Error in QP')
 
         return torch.as_tensor(u_bar[0], dtype=torch.float32, device=self._device).unsqueeze(dim=0)
-
-    # pylint: disable-next=attribute-defined-outside-init,import-outside-toplevel,invalid-name
-    def get_dynamics(self, obs: list[float], original_action: float) -> np.ndarray:
-        """Calculate the dynamics of the system.
-
-        Args:
-            obs (list[float]): The current observation of the system state.
-            original_action (float): The original action proposed by the RL algorithm.
-
-        Returns:
-            np.ndarray: The calculated dynamics of the system.
-        """
-        # time step
-        dt = 0.05
-        # gravitational constant
-        G = 10
-        # mass
-        m = 2
-        # length
-        length = 2
-
-        # calculate the angle
-        theta = np.arctan2(obs[1], obs[0])
-        # angular velocity
-        theta_dot = obs[2]
-
-        # dynamics equations
-        f = np.array(
-            [
-                -3 * G / (2 * length) * np.sin(theta + np.pi) * dt**2
-                + theta_dot * dt
-                + theta
-                + 3 / (m * length**2) * original_action * dt**2,
-                theta_dot
-                - 3 * G / (2 * length) * np.sin(theta + np.pi) * dt
-                + 3 / (m * length**2) * original_action * dt,
-            ],
-        )
-
-        return np.squeeze(f)
-
-    def update_gp_dynamics(self, obs: np.ndarray, act: np.ndarray) -> None:
-        """Update the Gaussian Process (GP) dynamics model based on observed states and actions.
-
-        Args:
-            obs (np.ndarray): Observed states.
-            act (np.ndarray): Actions taken.
-        """
-        obs = obs.detach().cpu().squeeze().numpy()
-        act = act.detach().cpu().squeeze().numpy()
-        N = self.observation_size
-        X = obs
-        U = act
-        L = len(X)
-        err = np.zeros((L - 1, N - 1))
-        S = np.zeros((L - 1, 2))
-        for i in range(L - 1):
-            f = self.get_dynamics(X[i], U[i])
-            theta_p = np.arctan2(X[i][1], X[i][0])
-            theta_dot_p = X[i][2]
-            theta = np.arctan2(X[i + 1][1], X[i + 1][0])
-            theta_dot = X[i + 1][2]
-            S[i, :] = np.array([theta_p, theta_dot_p])
-            err[i, :] = np.array([theta, theta_dot]) - f
-        self.gp_model[0].fit(S, err[:, 0])
-        self.gp_model[1].fit(S, err[:, 1])
-
-    def get_gp_dynamics(self, obs: torch.Tensor, use_prev_model: bool) -> list[np.ndarray]:
-        """Retrieve the GP dynamics based on the current observation.
-
-        Args:
-            obs (torch.Tensor): Current state observation.
-
-        Returns:
-            list[np.ndarray]: list containing the gp dynamics [f, g, x, std].
-        """
-        obs = obs.cpu().detach().numpy()
-        u_rl = 0
-        dt = 0.05
-        G = 10
-        m = 1
-        length = 1
-        obs = np.squeeze(obs)
-        theta = np.arctan2(obs[1], obs[0])
-        theta_dot = obs[2]
-        x = np.array([theta, theta_dot])
-        f_nom = np.array(
-            [
-                -3 * G / (2 * length) * np.sin(theta + np.pi) * dt**2
-                + theta_dot * dt
-                + theta
-                + 3 / (m * length**2) * u_rl * dt**2,
-                theta_dot
-                - 3 * G / (2 * length) * np.sin(theta + np.pi) * dt
-                + 3 / (m * length**2) * u_rl * dt,
-            ],
-        )
-        g = np.array([3 / (m * length**2) * dt**2, 3 / (m * length**2) * dt])
-        f_nom = np.squeeze(f_nom)
-        f = np.zeros(2)
-        if use_prev_model:
-            [m1, std1] = self.gp_model_prev[0].predict(x.reshape(1, -1), return_std=True)
-            [m2, std2] = self.gp_model_prev[1].predict(x.reshape(1, -1), return_std=True)
-        else:
-            [m1, std1] = self.gp_model[0].predict(x.reshape(1, -1), return_std=True)
-            [m2, std2] = self.gp_model[1].predict(x.reshape(1, -1), return_std=True)
-        f[0] = f_nom[0] + m1
-        f[1] = f_nom[1] + m2
-        return [
-            np.squeeze(f),
-            np.squeeze(g),
-            np.squeeze(x),
-            np.array([np.squeeze(std1), np.squeeze(std2)]),
-        ]
-
-    def reset_gp_model(self) -> None:
-        """Reset the gaussian process model of barrier function solver."""
-        self.gp_model_prev = self.gp_model.copy()
-        self.build_gp_model()
diff --git a/omnisafe/common/gp_model.py b/omnisafe/common/gp_model.py
new file mode 100644
index 000000000..771b29731
--- /dev/null
+++ b/omnisafe/common/gp_model.py
@@ -0,0 +1,218 @@
+# Copyright 2024 OmniSafe Team. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Implementation of the Control Barrier Function Solver."""
+
+# pylint: disable=invalid-name,wrong-spelling-in-docstring
+# mypy: ignore-errors
+
+
+from __future__ import annotations
+
+import joblib
+import numpy as np
+import torch
+from sklearn.gaussian_process import GaussianProcessRegressor
+from sklearn.gaussian_process.kernels import RBF
+from sklearn.gaussian_process.kernels import ConstantKernel as C
+
+
+# pylint: disable-next=too-many-instance-attributes
+class DynamicsModel:
+    """This class handles the creation and management of Gaussian Process (GP) models.
+
+    These GP models predict the next state of the environment based on the current state.
+
+    .. warning::
+        This class provides an implementation for the  ``Pendulum-v1``  environment. It needs to be
+        customized to extend it to more environments.
+
+    Args:
+        observation_size (int): The size of the observation space. This determines
+                                the number of GP models to create.
+        load_dir (Optional[str]): The directory to load the GP models from. If None, new models
+                                  are initialized. Default is None.
+
+    Attributes:
+        observation_size (int): The size of the observation space.
+        gp_model_prev (List[GaussianProcessRegressor]): The GP models from the previous iteration.
+        gp_model (List[GaussianProcessRegressor]): The current GP models used for predictions.
+    """
+
+    def __init__(self, observation_size: int, load_dir: str | None = None) -> None:
+        """Initialize the DynamicsModel with a specified observation size and optional model loading.
+
+        Args:
+            observation_size (int): Size of the observation space.
+            load_dir (Optional[str]): Directory to load the GP models from. If not provided,
+                                      new models will be created.
+        """
+        self.observation_size: int = observation_size
+        self.gp_model_prev: list[GaussianProcessRegressor]
+        self.gp_model: list[GaussianProcessRegressor]
+        self._build_gp_model(load_dir=load_dir)
+
+    def _build_gp_model(self, load_dir: str | None = None) -> None:
+        """Build or load the Gaussian Process models.
+
+        If a load directory is provided, the models are loaded from the specified directory.
+        Otherwise, new models are created with default parameters.
+
+        Args:
+            load_dir (Optional[str]): Directory to load the GP models from. If None, new models
+                                      will be created.
+        """
+        gp_list = []
+        noise = 0.01  # Small noise term to stabilize the GP model
+        for _ in range(self.observation_size - 1):
+            if not load_dir:
+                # Define the kernel as a product of a constant kernel and an RBF kernel
+                kern = C(1.0, (1e-3, 1e3)) * RBF(10, (1e-2, 1e2))
+                # Initialize the GaussianProcessRegressor with the specified kernel and noise
+                gp = GaussianProcessRegressor(kernel=kern, alpha=noise, n_restarts_optimizer=10)
+                gp_list.append(gp)
+            else:
+                # Load the GP models from the specified directory
+                gp_list = joblib.load(load_dir)
+        self.gp_model = gp_list
+        self.gp_model_prev = gp_list.copy()
+
+    @property
+    def gp_models(self) -> list[GaussianProcessRegressor]:
+        """Return all gaussian process regressor for saving."""
+        return self.gp_model
+
+    def get_dynamics(self, obs: list[float], original_action: float) -> np.ndarray:
+        """Calculate the dynamics of the system based on the current observation and the original action.
+
+        This method computes the next state of a pendulum system using the provided state and
+        action. The equations of motion for the pendulum are discretized using the Euler method.
+
+        Args:
+            obs (list[float]): The current observation of the system state.
+                               For the ``Pendulum-v1``, It should contain at least three elements:
+                               [x, y, theta_dot], where x and y are the Cartesian coordinates of
+                               the pendulum, and theta_dot is the angular velocity.
+            original_action (float): The original action proposed by the RL agent.
+
+        Returns:
+            np.ndarray: The calculated dynamics of the system, representing the next state.
+        """
+        # Time step
+        dt = 0.05
+        # Gravitational constant
+        G = 10
+        # Mass of the pendulum
+        m = 2
+        # Length of the pendulum
+        length = 2
+
+        # Calculate the angle theta from the Cartesian coordinates
+        theta = np.arctan2(obs[1], obs[0])
+        # Angular velocity
+        theta_dot = obs[2]
+
+        f = np.array(
+            [
+                -3 * G / (2 * length) * np.sin(theta + np.pi) * dt**2
+                + theta_dot * dt
+                + theta
+                + 3 / (m * length**2) * original_action * dt**2,
+                theta_dot
+                - 3 * G / (2 * length) * np.sin(theta + np.pi) * dt
+                + 3 / (m * length**2) * original_action * dt,
+            ],
+        )
+
+        return np.squeeze(f)
+
+    def update_gp_dynamics(self, obs: np.ndarray, act: np.ndarray) -> None:
+        """Update the Gaussian Process (GP) dynamics model based on observed states and actions.
+
+        Args:
+            obs (np.ndarray): Agent's observation of the current environment.
+            act (np.ndarray): Actions taken.
+        """
+        obs = obs.detach().cpu().squeeze().numpy()
+        act = act.detach().cpu().squeeze().numpy()
+        N = self.observation_size
+        X = obs
+        U = act
+        L = len(X)
+        err = np.zeros((L - 1, N - 1))
+        S = np.zeros((L - 1, 2))
+        for i in range(L - 1):
+            f = self.get_dynamics(X[i], U[i])
+            theta_p = np.arctan2(X[i][1], X[i][0])
+            theta_dot_p = X[i][2]
+            theta = np.arctan2(X[i + 1][1], X[i + 1][0])
+            theta_dot = X[i + 1][2]
+            S[i, :] = np.array([theta_p, theta_dot_p])
+            err[i, :] = np.array([theta, theta_dot]) - f
+        self.gp_model[0].fit(S, err[:, 0])
+        self.gp_model[1].fit(S, err[:, 1])
+
+    def get_gp_dynamics(self, obs: torch.Tensor, use_prev_model: bool) -> list[np.ndarray]:
+        """Retrieve the GP dynamics based on the current observation.
+
+        Args:
+            obs (torch.Tensor): Agent's observation of the current environment.
+            use_prev_model (bool): Whether to use previous gaussian model.
+
+        Returns:
+            list[np.ndarray]: list containing the gp dynamics [f, g, x, std].
+        """
+        obs = obs.cpu().detach().numpy()
+        u_rl = 0
+        dt = 0.05
+        G = 10
+        m = 1
+        length = 1
+        obs = np.squeeze(obs)
+        theta = np.arctan2(obs[1], obs[0])
+        theta_dot = obs[2]
+        x = np.array([theta, theta_dot])
+        f_nom = np.array(
+            [
+                -3 * G / (2 * length) * np.sin(theta + np.pi) * dt**2
+                + theta_dot * dt
+                + theta
+                + 3 / (m * length**2) * u_rl * dt**2,
+                theta_dot
+                - 3 * G / (2 * length) * np.sin(theta + np.pi) * dt
+                + 3 / (m * length**2) * u_rl * dt,
+            ],
+        )
+        g = np.array([3 / (m * length**2) * dt**2, 3 / (m * length**2) * dt])
+        f_nom = np.squeeze(f_nom)
+        f = np.zeros(2)
+        if use_prev_model:
+            [m1, std1] = self.gp_model_prev[0].predict(x.reshape(1, -1), return_std=True)
+            [m2, std2] = self.gp_model_prev[1].predict(x.reshape(1, -1), return_std=True)
+        else:
+            [m1, std1] = self.gp_model[0].predict(x.reshape(1, -1), return_std=True)
+            [m2, std2] = self.gp_model[1].predict(x.reshape(1, -1), return_std=True)
+        f[0] = f_nom[0] + m1
+        f[1] = f_nom[1] + m2
+        return [
+            np.squeeze(f),
+            np.squeeze(g),
+            np.squeeze(x),
+            np.array([np.squeeze(std1), np.squeeze(std2)]),
+        ]
+
+    def reset_gp_model(self) -> None:
+        """Reset the gaussian process model of barrier function solver."""
+        self.gp_model_prev = self.gp_model.copy()
+        self._build_gp_model()
diff --git a/omnisafe/common/robust_gp_model.py b/omnisafe/common/robust_gp_model.py
index 5a305140d..9361c833b 100644
--- a/omnisafe/common/robust_gp_model.py
+++ b/omnisafe/common/robust_gp_model.py
@@ -1,4 +1,4 @@
-# Copyright 2023 OmniSafe Team. All Rights Reserved.
+# Copyright 2024 OmniSafe Team. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -393,20 +393,20 @@ def predict_disturbance(self, test_x: torch.Tensor) -> tuple[torch.Tensor, torch
 
         return (to_tensor(means, dtype, device), to_tensor(f_std, dtype, device))
 
-    def load_disturbance_models(self, save_dir: str, epoch: str) -> None:
+    def load_disturbance_models(self, load_dir: str, epoch: str) -> None:
         """Load the disturbance models and their training data.
 
         Args:
-            save_dir (str): The directory where the model files are saved.
+            load_dir (str): The directory where the model files are saved.
             epoch (str): The epoch identifier used in the filenames to load the specific model checkpoint.
         """
         self._disturb_estimators = []
         weights = torch.load(
-            os.path.join(save_dir, f'gp_models_{epoch}.pkl'),
+            os.path.join(load_dir, f'gp_models_{epoch}.pkl'),
             map_location=self.device,
         )
-        self._train_x = torch.load(os.path.join(save_dir, f'gp_models_train_x_{epoch}.pkl'))
-        self._train_y = torch.load(os.path.join(save_dir, f'gp_models_train_y_{epoch}.pkl'))
+        self._train_x = torch.load(os.path.join(load_dir, f'gp_models_train_x_{epoch}.pkl'))
+        self._train_y = torch.load(os.path.join(load_dir, f'gp_models_train_y_{epoch}.pkl'))
         for i in range(self.n_s):
             self._disturb_estimators.append(
                 GPyDisturbanceEstimator(
diff --git a/omnisafe/evaluator.py b/omnisafe/evaluator.py
index 691d6aa86..088c8b4af 100644
--- a/omnisafe/evaluator.py
+++ b/omnisafe/evaluator.py
@@ -1,4 +1,4 @@
-# Copyright 2023 OmniSafe Team. All Rights Reserved.
+# Copyright 2024 OmniSafe Team. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -51,8 +51,9 @@
 from omnisafe.common.control_barrier_function.crabs.optimizers import Barrier
 from omnisafe.common.control_barrier_function.crabs.utils import Normalizer as CRABSNormalizer
 from omnisafe.common.control_barrier_function.crabs.utils import create_model_and_trainer
+from omnisafe.common.gp_model import DynamicsModel
 from omnisafe.common.robust_barrier_solver import CBFQPLayer
-from omnisafe.common.robust_gp_model import DynamicsModel
+from omnisafe.common.robust_gp_model import DynamicsModel as RoboustDynamicsModel
 from omnisafe.envs.core import CMDP, make
 from omnisafe.envs.wrapper import ActionRepeat, ActionScale, ObsNormalize, TimeLimit
 from omnisafe.models.actor import ActorBuilder
@@ -100,7 +101,7 @@ def __init__(
         self._safety_obs = torch.ones(1)
         self._cost_count = torch.zeros(1)
         self.__set_render_mode(render_mode)
-        self._dynamics_model: DynamicsModel | None = None
+        self._dynamics_model: DynamicsModel | RoboustDynamicsModel | None = None
         self._solver: PendulumSolver | CBFQPLayer | None = None
         self._compensator = None
 
@@ -311,6 +312,18 @@ def __load_model_and_env(
             self._actor = actor_builder.build_actor(actor_type)
             self._actor.load_state_dict(model_params['pi'])
             if self._cfgs['algo'] == 'DDPGCBF' or self._cfgs['algo'] == 'TRPOCBF':
+                epoch = model_name.split('.pt')[0].split('-')[-1]
+                self._solver = PendulumSolver(action_size=self._env.action_space.shape[0])
+                path = os.path.join(
+                    save_dir,
+                    'gp_model_save',
+                    f'gaussian_process_regressor_{epoch}.pkl',
+                )
+                self._dynamics_model = DynamicsModel(
+                    observation_size=observation_space.shape[0],
+                    load_dir=path,
+                )
+
                 self._compensator = BarrierCompensator(
                     obs_dim=observation_space.shape[0],
                     act_dim=action_space.shape[0],
@@ -332,9 +345,9 @@ def __load_model_and_env(
                     gamma_b=self._cfgs['cbf_cfgs']['gamma_b'],
                     l_p=self._cfgs['cbf_cfgs']['l_p'],
                 )
-                self._dynamics_model = DynamicsModel(env=self._env)
+                self._dynamics_model = RoboustDynamicsModel(env=self._env)
                 self._dynamics_model.load_disturbance_models(
-                    save_dir=os.path.join(self._save_dir, 'gp_model_save'),
+                    load_dir=os.path.join(self._save_dir, 'gp_model_save'),
                     epoch=epoch,
                 )
 
@@ -413,22 +426,11 @@ def load_saved(
         # load the config
         self._save_dir = save_dir
         self._model_name = model_name
-        epoch = model_name.split('.pt')[0].split('-')[-1]
 
         self.__load_cfgs(save_dir)
 
         self.__set_render_mode(render_mode)
 
-        if self._cfgs['algo'] == 'DDPGCBF' or self._cfgs['algo'] == 'TRPOCBF':
-
-            self._solver = PendulumSolver()
-            path = os.path.join(
-                save_dir,
-                'gp_model_save',
-                f'gaussian_process_regressor_{epoch}.pkl',
-            )
-            self._solver.build_gp_model(save_dir=path)
-
         env_kwargs = {
             'env_id': self._cfgs['env_id'],
             'num_envs': 1,
@@ -443,7 +445,7 @@ def load_saved(
 
         self.__load_model_and_env(save_dir, model_name, env_kwargs)
 
-    # pylint: disable-next=too-many-locals
+    # pylint: disable-next=too-many-locals,too-many-branches
     def evaluate(
         self,
         num_episodes: int = 10,
@@ -503,13 +505,13 @@ def evaluate(
                 if self._cfgs['algo'] == 'DDPGCBF' or self._cfgs['algo'] == 'TRPOCBF':
                     approx_compensating_act = self._compensator(obs=obs)
                     compensated_act_mean_raw = act + approx_compensating_act
-                    [f, g, x, std] = self._solver.get_gp_dynamics(obs, use_prev_model=False)
+                    [f, g, x, std] = self._dynamics_model.get_gp_dynamics(obs, use_prev_model=False)
                     compensating_act = self._solver.control_barrier(
-                        compensated_act_mean_raw,
-                        f,
-                        g,
-                        x,
-                        std,
+                        original_action=compensated_act_mean_raw,
+                        f=f,
+                        g=g,
+                        x=x,
+                        std=std,
                     )
                     act = compensated_act_mean_raw + compensating_act
 
@@ -532,7 +534,12 @@ def evaluate(
                     self._safety_obs /= self._cfgs.algo_cfgs.saute_gamma
 
                 ep_ret += rew.item()
-                ep_cost += (cost_criteria**length) * cost.item()
+
+                if self._cfgs['algo'] == 'DDPGCBF' or self._cfgs['algo'] == 'TRPOCBF':
+                    ep_cost = ep_cost if ep_cost > cost.item() else cost.item()
+                else:
+                    ep_cost += (cost_criteria**length) * cost.item()
+
                 if (
                     'EarlyTerminated' in self._cfgs['algo']
                     and ep_cost >= self._cfgs.algo_cfgs.cost_limit
@@ -647,13 +654,16 @@ def render(  # pylint: disable=too-many-locals,too-many-arguments,too-many-branc
                         if self._cfgs['algo'] == 'DDPGCBF' or self._cfgs['algo'] == 'TRPOCBF':
                             approx_compensating_act = self._compensator(obs=obs)
                             compensated_act_mean_raw = act + approx_compensating_act
-                            [f, g, x, std] = self._solver.get_gp_dynamics(obs, use_prev_model=False)
+                            [f, g, x, std] = self._dynamics_model.get_gp_dynamics(
+                                obs,
+                                use_prev_model=False,
+                            )
                             compensating_act = self._solver.control_barrier(
-                                compensated_act_mean_raw,
-                                f,
-                                g,
-                                x,
-                                std,
+                                original_action=compensated_act_mean_raw,
+                                f=f,
+                                g=g,
+                                x=x,
+                                std=std,
                             )
                             act = compensated_act_mean_raw + compensating_act
 
@@ -688,7 +698,10 @@ def render(  # pylint: disable=too-many-locals,too-many-arguments,too-many-branc
                 step += 1
                 done = bool(terminated or truncated)
                 ep_ret += rew.item()
-                ep_cost += (cost_criteria**length) * cost.item()
+                if self._cfgs['algo'] == 'DDPGCBF' or self._cfgs['algo'] == 'TRPOCBF':
+                    ep_cost = ep_cost if ep_cost > cost.item() else cost.item()
+                else:
+                    ep_cost += (cost_criteria**length) * cost.item()
                 if (
                     'EarlyTerminated' in self._cfgs['algo']
                     and ep_cost >= self._cfgs.algo_cfgs.cost_limit
diff --git a/pyproject.toml b/pyproject.toml
index 350414746..5b4a33e95 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -130,7 +130,6 @@ ignore-words = "docs/source/spelling_wordlist.txt"
 # Sync with requires-python
 target-version = "py38"
 line-length = 100
-show-source = true
 src = ["omnisafe", "tests", "examples"]
 select = [
     "E", "W",  # pycodestyle

From 436dbddc0978d97920769ad3116381f1f1a1ed8d Mon Sep 17 00:00:00 2001
From: Gaiejj <gaiejj@outlook.com>
Date: Thu, 4 Jul 2024 19:39:39 +0800
Subject: [PATCH 16/18] docs(cbf): update CBF methods docs

---
 docs/source/index.rst      |   1 +
 docs/source/saferl/cbf.rst | 136 +++++++++++++++++++++++++++++++++++++
 2 files changed, 137 insertions(+)
 create mode 100644 docs/source/saferl/cbf.rst

diff --git a/docs/source/index.rst b/docs/source/index.rst
index 792f62052..402ed6203 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -394,6 +394,7 @@ this project, don't hesitate to ask your question on `the GitHub issue page <htt
     saferl/pcpo
     saferl/focops
     saferl/lag
+    saferl/cbf
 
 .. toctree::
     :hidden:
diff --git a/docs/source/saferl/cbf.rst b/docs/source/saferl/cbf.rst
new file mode 100644
index 000000000..91d84f4b8
--- /dev/null
+++ b/docs/source/saferl/cbf.rst
@@ -0,0 +1,136 @@
+Control Barrier Functions Methods
+=================================
+
+Quick Facts
+-----------
+
+.. card::
+    :class-card: sd-outline-info  sd-rounded-1
+    :class-body: sd-font-weight-bold
+
+    #. Control Barrier Functions (CBF) method maps :bdg-danger-line:`unsafe` actions to :bdg-info-line:`safe` actions.
+    #. CBF method is highly :bdg-info-line:`coupled` with the environment.
+    #. Currently, OmniSafe provides implementations for four algorithms: *DDPGCBF*, *TRPOCBF*, *SACRCBF*, and *CRABS*
+    #. Here we introduce the :bdg-info-line:`interface` to extend the CBF method to :bdg-info-line:`custom environments`.
+
+Introduction
+------------
+
+This section will introduce the implementation framework of OmniSafe using the DDPGCBF from the classic CBF method: `End-to-End Safe Reinforcement Learning through Barrier Functions for Safety-Critical Continuous Control Tasks <https://arxiv.org/pdf/1903.08792>`_ as an example.
+
+The CBF method implementation in OmniSafe revolves around the ``Adapter``, which decouples and integrates the two core components: ``dynamics model`` and ``solver``. The former predicts the dynamic changes of the environment, while the latter maps the current action to a safe space based on the given environment dynamics.
+
+CBF Adapter
+-----------
+
+.. currentmodule:: omnisafe.adapter
+
+.. card::
+    :class-header: sd-bg-success sd-text-white
+    :class-card: sd-outline-success  sd-rounded-1
+
+    Documentation
+    ^^^
+
+    .. autoclass:: OffPolicyBarrierFunctionAdapter
+        :members:
+
+Core Components
+---------------
+
+Dynamics Model
+""""""""""""""
+
+The environmental dynamic model of the CBF method needs to be designed for a specific environment. For example, in the case of the ``Pendulum-v1`` environment, the environmental dynamics will be calculated together with variables such as mass and gravitational acceleration.
+
+.. code-block:: python
+    :linenos:
+
+    def get_dynamics(self, obs: list[float], original_action: float) -> np.ndarray:
+        dt = 0.05
+        # gravitational constant
+        G = 10
+        # mass
+        m = 2
+        # length
+        length = 2
+        # calculate the angle
+        theta = np.arctan2(obs[1], obs[0])
+        # angular velocity
+        theta_dot = obs[2]
+        # dynamics equations
+        f = np.array(
+            [
+                -3 * G / (2 * length) * np.sin(theta + np.pi) * dt**2
+                + theta_dot * dt
+                + theta
+                + 3 / (m * length**2) * original_action * dt**2,
+                theta_dot
+                - 3 * G / (2 * length) * np.sin(theta + np.pi) * dt
+                + 3 / (m * length**2) * original_action * dt,
+            ],
+        )
+        return np.squeeze(f)
+
+The current mainstream implementation often uses a combination of several Gaussian Process (GP) models to fit the environmental dynamics. The specific code documentation is as follows:
+
+.. currentmodule:: omnisafe.common
+
+.. card::
+    :class-header: sd-bg-success sd-text-white
+    :class-card: sd-outline-success  sd-rounded-1
+
+    Documentation
+    ^^^
+
+    .. autoclass:: DynamicsModel
+        :members:
+        :private-members:
+
+The ``solver`` is responsible for taking the feedback information from the ``dynamics model`` and mapping the often unsafe actions generated by the agent into a safe one.
+
+CBF Solver
+""""""""""
+
+.. currentmodule:: omnisafe.common
+
+.. card::
+    :class-header: sd-bg-success sd-text-white
+    :class-card: sd-outline-success  sd-rounded-1
+
+    Documentation
+    ^^^
+
+    .. autoclass:: PendulumSolver
+        :members:
+        :private-members:
+
+Architecture of methods
+"""""""""""""""""""""""
+
+-  ``DDPGCBF.learn()``
+
+   - ``DDPGCBF._env.rollout()``
+
+     - ``DDPGCBF._env.get_safe_action()``
+
+       - ``DDPGCBF._env.dynamics_model.get_gp_dynamics()``
+       - ``DDPGCBF._env.solver.control_barrier()``
+
+     - ``DDPGCBF._env.dynamics_model.update_gp_dynamics()``
+
+   - ``DDPGCBF._update()``
+
+
+Further Discussion
+""""""""""""""""""
+
+For details on the implementation, performance, reproducible scripts, and related discussions of algorithms including DDPGCBF, please refer to: https://github.com/PKU-Alignment/omnisafe/pull/323
+
+
+References
+----------
+
+-  `End-to-End Safe Reinforcement Learning through Barrier Functions for Safety-Critical Continuous Control Tasks <https://arxiv.org/pdf/1903.08792>`__
+-  `Safe Reinforcement Learning Using Robust Control Barrier Functions <https://arxiv.org/pdf/2110.05415>`__
+-  `Learning Barrier Certificates: Towards Safe Reinforcement Learning with Zero Training-time Violations <https://arxiv.org/pdf/2108.01846>`__

From f56875b48782a32fe92729b93707bc3b6284ea0a Mon Sep 17 00:00:00 2001
From: Gaiejj <gaiejj@outlook.com>
Date: Thu, 4 Jul 2024 19:58:30 +0800
Subject: [PATCH 17/18] style: polish code style

---
 conftest.py          |  4 +---
 omnisafe/version.py  |  6 +++---
 pyproject.toml       | 14 +++++++-------
 tests/test_buffer.py |  2 +-
 4 files changed, 12 insertions(+), 14 deletions(-)

diff --git a/conftest.py b/conftest.py
index f3a1e8b06..266ac7a7e 100644
--- a/conftest.py
+++ b/conftest.py
@@ -10,6 +10,4 @@
 
 
 def pytest_ignore_collect(path, config):
-    if os.path.basename(path) == 'meta_drive_env.py' and not meta_drive_env_available:
-        return True
-    return False
+    return os.path.basename(path) == 'meta_drive_env.py' and not meta_drive_env_available
diff --git a/omnisafe/version.py b/omnisafe/version.py
index 0295dccbf..bb545ba26 100644
--- a/omnisafe/version.py
+++ b/omnisafe/version.py
@@ -1,4 +1,4 @@
-# Copyright 2023 OmniSafe Team. All Rights Reserved.
+# Copyright 2024 OmniSafe Team. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -25,8 +25,8 @@
 
     try:
         prefix, sep, suffix = (
-            subprocess.check_output(
-                ['git', 'describe', '--abbrev=7'],  # noqa: S603,S607
+            subprocess.check_output(  # noqa: S603
+                ['git', 'describe', '--abbrev=7'],  # noqa: S607
                 cwd=os.path.dirname(os.path.abspath(__file__)),
                 stderr=subprocess.DEVNULL,
                 text=True,
diff --git a/pyproject.toml b/pyproject.toml
index 5b4a33e95..d7351aeb5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -131,7 +131,7 @@ ignore-words = "docs/source/spelling_wordlist.txt"
 target-version = "py38"
 line-length = 100
 src = ["omnisafe", "tests", "examples"]
-select = [
+lint.select = [
     "E", "W",  # pycodestyle
     "F",       # pyflakes
     "UP",      # pyupgrade
@@ -152,7 +152,7 @@ select = [
     "TID",     # flake8-tidy-imports
     "RUF",     # ruff
 ]
-ignore = [
+lint.ignore = [
     # E501: line too long
     # W505: doc line too long
     # too long docstring due to long example blocks
@@ -171,9 +171,9 @@ ignore = [
     # use alias for import convention (e.g., `import torch.nn as nn`)
     "PLR0402",
 ]
-typing-modules = ["omnisafe.typing"]
+lint.typing-modules = ["omnisafe.typing"]
 
-[tool.ruff.per-file-ignores]
+[tool.ruff.lint.per-file-ignores]
 "__init__.py" = [
     "F401",  # unused-import
 ]
@@ -235,15 +235,15 @@ typing-modules = ["omnisafe.typing"]
     "ANN003",   # Missing type annotation
 ]
 
-[tool.ruff.flake8-annotations]
+[tool.ruff.lint.flake8-annotations]
 allow-star-arg-any = true
 
-[tool.ruff.flake8-quotes]
+[tool.ruff.lint.flake8-quotes]
 docstring-quotes = "double"
 multiline-quotes = "double"
 inline-quotes = "single"
 
-[tool.ruff.flake8-tidy-imports]
+[tool.ruff.lint.flake8-tidy-imports]
 ban-relative-imports = "all"
 
 [tool.pytest.ini_options]
diff --git a/tests/test_buffer.py b/tests/test_buffer.py
index 0fee90a46..b284b9e10 100644
--- a/tests/test_buffer.py
+++ b/tests/test_buffer.py
@@ -79,7 +79,7 @@ def test_vector_onpolicy_buffer(
     assert (
         vector_buffer.standardized_adv_r == standardized_adv_r
     ), f'vector_buffer.sstandardized_adv_r is {vector_buffer.sstandardized_adv_r}'
-    assert vector_buffer.buffers is not [], f'vector_buffer.buffers is {vector_buffer.buffers}'
+    assert vector_buffer.buffers != [], f'vector_buffer.buffers is {vector_buffer.buffers}'
 
     # checking the store function
     obs_dim = obs_space.shape[0]

From 34104cdf3b5c9797199d732acb9344ce6b2a563b Mon Sep 17 00:00:00 2001
From: Gaiejj <gaiejj@outlook.com>
Date: Thu, 4 Jul 2024 20:12:35 +0800
Subject: [PATCH 18/18] style: polish docs style

---
 docs/source/spelling_wordlist.txt | 1 +
 omnisafe/common/gp_model.py       | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/source/spelling_wordlist.txt b/docs/source/spelling_wordlist.txt
index 46e297388..958277550 100644
--- a/docs/source/spelling_wordlist.txt
+++ b/docs/source/spelling_wordlist.txt
@@ -513,3 +513,4 @@ Vipul
 Sivaranjani
 Vijay
 suttle
+regressor
diff --git a/omnisafe/common/gp_model.py b/omnisafe/common/gp_model.py
index 771b29731..dac93ea13 100644
--- a/omnisafe/common/gp_model.py
+++ b/omnisafe/common/gp_model.py
@@ -97,7 +97,7 @@ def get_dynamics(self, obs: list[float], original_action: float) -> np.ndarray:
         """Calculate the dynamics of the system based on the current observation and the original action.
 
         This method computes the next state of a pendulum system using the provided state and
-        action. The equations of motion for the pendulum are discretized using the Euler method.
+        action.
 
         Args:
             obs (list[float]): The current observation of the system state.