Skip to content
71 changes: 71 additions & 0 deletions deepmd/pt/loss/ener.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,9 @@ def __init__(
inference: bool = False,
use_huber: bool = False,
huber_delta: float = 0.01,
start_pref_ap: float = 0.0,
limit_pref_ap: float = 0.0,
numb_aparam: int = 0,
**kwargs: Any,
) -> None:
r"""Construct a layer to compute loss on energy, force and virial.
Expand Down Expand Up @@ -109,6 +112,12 @@ def __init__(
Formula: loss = 0.5 * (error**2) if |error| <= D else D * (|error| - 0.5 * D).
huber_delta : float
The threshold delta (D) used for Huber loss, controlling transition between L2 and L1 loss.
start_pref_ap : float
The prefactor of aparam gradient loss at the start of the training.
limit_pref_ap : float
The prefactor of aparam gradient loss at the end of the training.
numb_aparam : int
The dimension of atomic parameters. Required when aparam gradient loss is enabled.
**kwargs
Other keyword arguments.
"""
Expand Down Expand Up @@ -151,6 +160,15 @@ def __init__(
"Huber loss is not implemented for force with atom_pref, generalized force and relative force. "
)

self.has_ap = start_pref_ap != 0.0 or limit_pref_ap != 0.0
if self.has_ap and numb_aparam == 0:
raise RuntimeError(
"numb_aparam must be > 0 when aparam gradient loss is enabled"
)
self.start_pref_ap = start_pref_ap
self.limit_pref_ap = limit_pref_ap
self.numb_aparam = numb_aparam

def forward(
self,
input_dict: dict[str, torch.Tensor],
Expand Down Expand Up @@ -182,6 +200,16 @@ def forward(
more_loss: dict[str, torch.Tensor]
Other losses for display.
"""
ap_for_grad: torch.Tensor | None = None
if (
self.has_ap
and input_dict.get("aparam") is not None
and torch.is_grad_enabled()
):
ap_for_grad = input_dict["aparam"].detach()
ap_for_grad.requires_grad_(True)
input_dict = {**input_dict, "aparam": ap_for_grad}

model_pred = model(**input_dict)
coef = learning_rate / self.starter_learning_rate
pref_e = self.limit_pref_e + (self.start_pref_e - self.limit_pref_e) * coef
Expand Down Expand Up @@ -402,6 +430,37 @@ def forward(
rmse_ae.detach(), find_atom_ener
)

if (
self.has_ap
and ap_for_grad is not None
and "energy" in model_pred
and "grad_aparam" in label
):
find_grad_ap = label.get("find_grad_aparam", 0.0)
pref_ap = (
self.limit_pref_ap + (self.start_pref_ap - self.limit_pref_ap) * coef
) * find_grad_ap
energy_pred = model_pred["energy"] # [nf, 1]
# 计算 d(sum_E)/d(aparam_raw),shape [nf, nloc, numb_aparam]
grad_ap_pred = torch.autograd.grad(
[energy_pred.sum()],
[ap_for_grad],
create_graph=True, # 使二阶梯度流回模型参数
retain_graph=True, # 保持计算图供 energy/force 损失反传
)[0]
assert grad_ap_pred is not None
grad_ap_label = label["grad_aparam"].to(grad_ap_pred.dtype)
diff_ap = (grad_ap_label - grad_ap_pred).reshape(-1)
l2_ap_loss = torch.mean(torch.square(diff_ap))
if not self.inference:
more_loss["l2_grad_aparam_loss"] = self.display_if_exist(
l2_ap_loss.detach(), find_grad_ap
)
loss += (pref_ap * l2_ap_loss).to(GLOBAL_PT_FLOAT_PRECISION)
more_loss["rmse_grad_aparam"] = self.display_if_exist(
l2_ap_loss.sqrt().detach(), find_grad_ap
)

if not self.inference:
more_loss["rmse"] = torch.sqrt(loss.detach())
return model_pred, loss, more_loss
Expand Down Expand Up @@ -482,6 +541,16 @@ def label_requirement(self) -> list[DataRequirementItem]:
default=1.0,
)
)
if self.has_ap:
label_requirement.append(
DataRequirementItem(
"grad_aparam",
ndof=self.numb_aparam,
atomic=True,
must=False,
high_prec=False,
)
)
return label_requirement

def serialize(self) -> dict:
Expand Down Expand Up @@ -510,6 +579,8 @@ def serialize(self) -> dict:
"enable_atom_ener_coeff": self.enable_atom_ener_coeff,
"start_pref_gf": self.start_pref_gf,
"limit_pref_gf": self.limit_pref_gf,
"start_pref_ap": self.start_pref_ap,
"limit_pref_ap": self.limit_pref_ap,
Copy link

Copilot AI Mar 4, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The numb_aparam field is stored as an instance attribute (self.numb_aparam) and is required to reconstruct the loss object when has_ap is true, but it is not included in the serialize() return dict. When deserialize() calls cls(**data), the numb_aparam argument will be absent, causing the reconstructed object to default to numb_aparam=0. If start_pref_ap or limit_pref_ap is non-zero in the serialized data, the __init__ will then raise RuntimeError("numb_aparam must be > 0 when aparam gradient loss is enabled"), making deserialization impossible for models that use this feature.

Additionally, since a new field is added to the serialized representation, the @version should be bumped (e.g. to 3) and check_version_compatibility in deserialize() updated to check_version_compatibility(data.pop("@version"), 3, 1) to reflect the schema change, as is the established convention in the codebase.

Copilot uses AI. Check for mistakes.
"numb_generalized_coord": self.numb_generalized_coord,
Copy link

Copilot AI Mar 4, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

numb_aparam is not serialized in the serialize() method. Without it, deserialize() will pass numb_aparam=0 to __init__, causing a RuntimeError when has_ap is True. Add "numb_aparam": self.numb_aparam to the serialized dict.

Suggested change
"numb_generalized_coord": self.numb_generalized_coord,
"numb_generalized_coord": self.numb_generalized_coord,
"numb_aparam": self.numb_aparam,

Copilot uses AI. Check for mistakes.
"use_huber": self.use_huber,
"huber_delta": self.huber_delta,
Expand Down
5 changes: 5 additions & 0 deletions deepmd/pt/train/training.py
Original file line number Diff line number Diff line change
Expand Up @@ -1690,6 +1690,11 @@ def get_loss(
return EnergyHessianStdLoss(**loss_params)
elif loss_type == "ener":
loss_params["starter_learning_rate"] = start_lr
if (
loss_params.get("start_pref_ap", 0.0) != 0.0
or loss_params.get("limit_pref_ap", 0.0) != 0.0
):
loss_params["numb_aparam"] = _model.get_dim_aparam()
Comment on lines +1688 to +1702
Copy link

Copilot AI Mar 4, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When whether_hessian() returns True, the code falls into the first branch (line 1688-1690) and creates EnergyHessianStdLoss. The numb_aparam injection (lines 1693-1697) is in the elif loss_type == "ener" branch, which is skipped. As a result, if a user configures both start_pref_h > 0 and start_pref_ap != 0.0, the EnergyHessianStdLoss constructor will receive numb_aparam=0 (the default), causing the RuntimeError "numb_aparam must be > 0 when aparam gradient loss is enabled". The numb_aparam injection logic should be moved to cover both branches, or factored out into a shared helper.

Copilot uses AI. Check for mistakes.
return EnergyStdLoss(**loss_params)
elif loss_type == "dos":
loss_params["starter_learning_rate"] = start_lr
Expand Down
16 changes: 16 additions & 0 deletions deepmd/utils/argcheck.py
Original file line number Diff line number Diff line change
Expand Up @@ -3072,6 +3072,8 @@ def loss_ener() -> list[Argument]:
doc_limit_pref_pf = limit_pref("atomic prefactor force")
doc_start_pref_gf = start_pref("generalized force", label="drdq", abbr="gf")
doc_limit_pref_gf = limit_pref("generalized force")
doc_start_pref_ap = start_pref("aparam gradient", label="grad_aparam", abbr="ap")
doc_limit_pref_ap = limit_pref("aparam gradient")
doc_numb_generalized_coord = "The dimension of generalized coordinates. Required when generalized force loss is used."
doc_relative_f = "If provided, relative force error will be used in the loss. The difference of force will be normalized by the magnitude of the force in the label with a shift given by `relative_f`, i.e. DF_i / ( || F || + relative_f ) with DF denoting the difference between prediction and label and || F || denoting the L2 norm of the label."
doc_enable_atom_ener_coeff = "If true, the energy will be computed as \\sum_i c_i E_i. c_i should be provided by file atom_ener_coeff.npy in each data system, otherwise it's 1."
Expand Down Expand Up @@ -3211,6 +3213,20 @@ def loss_ener() -> list[Argument]:
default=0.01,
doc=doc_huber_delta,
),
Argument(
"start_pref_ap",
[float, int],
optional=True,
default=0.0,
doc=doc_start_pref_ap,
),
Argument(
"limit_pref_ap",
[float, int],
optional=True,
default=0.0,
doc=doc_limit_pref_ap,
),
]


Expand Down
Loading