diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c index 09cb7b2f33a8..c2a44f16155e 100644 --- a/lib/libzfs/libzfs_pool.c +++ b/lib/libzfs/libzfs_pool.c @@ -3563,10 +3563,53 @@ zpool_vdev_fault(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux) zfs_cmd_t zc = {"\0"}; char errbuf[ERRBUFLEN]; libzfs_handle_t *hdl = zhp->zpool_hdl; + nvlist_t *vdev_nv; + boolean_t avail_spare, l2cache; + char *vdev_name; + char guid_str[21]; /* 64-bit num + '\0' */ + boolean_t is_draid_spare = B_FALSE; + const char *vdev_type; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot fault %llu"), (u_longlong_t)guid); + snprintf(guid_str, sizeof (guid_str), "%llu", (u_longlong_t)guid); + if ((vdev_nv = zpool_find_vdev(zhp, guid_str, &avail_spare, + &l2cache, NULL)) == NULL) + return (zfs_error(hdl, EZFS_NODEVICE, errbuf)); + + vdev_name = zpool_vdev_name(hdl, zhp, vdev_nv, 0); + if (vdev_name != NULL) { + /* + * We have the actual vdev name, so use that instead of the GUID + * in any error messages. + */ + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, "cannot fault %s"), vdev_name); + free(vdev_name); + } + + /* + * Spares (traditional or draid) cannot be faulted by libzfs, except: + * + * - Any spare type that exceeds it's errors can be faulted (aux = + * VDEV_AUX_ERR_EXCEEDED). This is only used by zed. + * + * - Traditional spares that are active can be force faulted. + */ + if (nvlist_lookup_string(vdev_nv, ZPOOL_CONFIG_TYPE, &vdev_type) == 0) + if (strcmp(vdev_type, VDEV_TYPE_DRAID_SPARE) == 0) + is_draid_spare = B_TRUE; + + /* + * If vdev is a spare that is not being used, or is a dRAID spare (in + * use or not), then don't allow it to be force-faulted. However, an + * in-use dRAID spare can be faulted by ZED if see too many errors + * (aux = VDEV_AUX_ERR_EXCEEDED). + */ + if (avail_spare || (is_draid_spare && aux != VDEV_AUX_ERR_EXCEEDED)) + return (zfs_error(hdl, EZFS_ISSPARE, errbuf)); + (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); zc.zc_guid = guid; zc.zc_cookie = VDEV_STATE_FAULTED; diff --git a/man/man8/zpool-offline.8 b/man/man8/zpool-offline.8 index 388c7634acce..155e185236d0 100644 --- a/man/man8/zpool-offline.8 +++ b/man/man8/zpool-offline.8 @@ -56,11 +56,12 @@ .Ar pool .Ar device Ns … .Xc -Takes the specified physical device offline. +Takes the specified physical device offline or force-fault it. While the .Ar device -is offline, no attempt is made to read or write to the device. -This command is not applicable to spares. +is offline or force-faulted, no attempt is made to read or write to the device. +dRAID spares can not be offlined or force faulted. +Traditional spares can only be offlined or force-faulted when they are active. .Bl -tag -width Ds .It Fl -power Power off the device's slot in the storage enclosure. diff --git a/tests/runfiles/common.run b/tests/runfiles/common.run index 0266f22e4806..3a302eb4cb84 100644 --- a/tests/runfiles/common.run +++ b/tests/runfiles/common.run @@ -524,7 +524,7 @@ tags = ['functional', 'cli_root', 'zpool_initialize'] [tests/functional/cli_root/zpool_offline] tests = ['zpool_offline_001_pos', 'zpool_offline_002_neg', - 'zpool_offline_003_pos'] + 'zpool_offline_003_pos', 'zpool_offline_spare'] tags = ['functional', 'cli_root', 'zpool_offline'] [tests/functional/cli_root/zpool_online] diff --git a/tests/runfiles/sanity.run b/tests/runfiles/sanity.run index dad51d2e99be..307f554ca135 100644 --- a/tests/runfiles/sanity.run +++ b/tests/runfiles/sanity.run @@ -323,7 +323,8 @@ pre = tags = ['functional', 'cli_root', 'zpool_initialize'] [tests/functional/cli_root/zpool_offline] -tests = ['zpool_offline_001_pos', 'zpool_offline_002_neg'] +tests = ['zpool_offline_001_pos', 'zpool_offline_002_neg', + 'zpool_offline_spare'] tags = ['functional', 'cli_root', 'zpool_offline'] [tests/functional/cli_root/zpool_online] diff --git a/tests/zfs-tests/tests/Makefile.am b/tests/zfs-tests/tests/Makefile.am index 23c2d9e4b95d..266e18318779 100644 --- a/tests/zfs-tests/tests/Makefile.am +++ b/tests/zfs-tests/tests/Makefile.am @@ -1216,6 +1216,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/cli_root/zpool_offline/zpool_offline_001_pos.ksh \ functional/cli_root/zpool_offline/zpool_offline_002_neg.ksh \ functional/cli_root/zpool_offline/zpool_offline_003_pos.ksh \ + functional/cli_root/zpool_offline/zpool_offline_spare.ksh \ functional/cli_root/zpool_online/cleanup.ksh \ functional/cli_root/zpool_online/setup.ksh \ functional/cli_root/zpool_online/zpool_online_001_pos.ksh \ diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_offline/zpool_offline_spare.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_offline/zpool_offline_spare.ksh new file mode 100755 index 000000000000..cd7776f00aba --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_offline/zpool_offline_spare.ksh @@ -0,0 +1,84 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# Copyright 2026 by Lawrence Livermore National Security, LLC. + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# Verify that traditional spares that are active can be offlined or +# force-faulted. Verify that in all other cases, spares cannot be +# offlined or faulted. +# +# STRATEGY: +# 1. Create pool with traditional spare +# 2. Verify we can't offline and fault an inactive traditional spare +# 3. Verify we can offline and fault an active traditional spare +# 4. Create draid pool with draid spare +# 5. Verify we can't offline/fault draid spare + +TESTPOOL2=testpool2 +function cleanup +{ + destroy_pool $TESTPOOL2 + log_must rm -f $TESTDIR/file-vdev-{1..3} +} + +log_onexit cleanup +verify_runnable "global" + +log_assert "Verify zpool offline has the correct behavior on spares" + +# Verify any old file vdevs are gone +log_mustnot ls $TESTDIR/file-vdev-* &> /dev/null + +log_must truncate -s 100M $TESTDIR/file-vdev-{1..3} + +log_must zpool create $TESTPOOL2 mirror $TESTDIR/file-vdev-1 \ + $TESTDIR/file-vdev-2 spare $TESTDIR/file-vdev-3 + +# Test that we can't offline an inactive spare +log_mustnot zpool offline $TESTPOOL2 $TESTDIR/file-vdev-3 +log_mustnot zpool offline -f $TESTPOOL2 $TESTDIR/file-vdev-3 + +# Test that we can offline an active spare +log_must zpool replace $TESTPOOL2 $TESTDIR/file-vdev-1 $TESTDIR/file-vdev-3 +log_must zpool offline $TESTPOOL2 $TESTDIR/file-vdev-3 +log_must zpool online $TESTPOOL2 $TESTDIR/file-vdev-3 +log_must zpool offline -f $TESTPOOL2 $TESTDIR/file-vdev-3 + +destroy_pool $TESTPOOL2 + +log_must zpool create -f $TESTPOOL2 draid1:1d:1s:3c $TESTDIR/file-vdev-{1..3} + +# Test that we can't offline an inactive draid spare +log_mustnot zpool offline $TESTPOOL2 draid1-0-0 +log_mustnot zpool offline -f $TESTPOOL2 draid1-0-0 + +# Test that we can't offline an active draid spare +log_must zpool replace $TESTPOOL2 $TESTDIR/file-vdev-1 draid1-0-0 +log_mustnot zpool offline $TESTPOOL2 draid1-0-0 +log_mustnot zpool offline -f $TESTPOOL2 draid1-0-0 + +log_pass "zpool offline has the correct behavior on spares" diff --git a/tests/zfs-tests/tests/functional/fault/auto_offline_001_pos.ksh b/tests/zfs-tests/tests/functional/fault/auto_offline_001_pos.ksh index 43bfe2f5bfb1..83b3b77aa193 100755 --- a/tests/zfs-tests/tests/functional/fault/auto_offline_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/fault/auto_offline_001_pos.ksh @@ -166,9 +166,8 @@ do mntpnt=$(get_prop mountpoint /$TESTPOOL) - # 2. Fault the spare device making it unavailable - log_must zpool offline -f $TESTPOOL $sparedev - log_must wait_hotspare_state $TESTPOOL $sparedev "FAULTED" + # 2. Remove the spare device making it unavailable + log_must zpool remove $TESTPOOL $sparedev # 3. Simulate physical removal of one device remove_disk $removedev