diff --git a/Makefile b/Makefile index 44f6807f3..72db00c18 100644 --- a/Makefile +++ b/Makefile @@ -84,6 +84,7 @@ install: completion fi install -D -m 0644 -t $(DESTDIR)/usr/lib/systemd/system crates/initramfs/*.service install -D -m 0755 target/release/bootc-initramfs-setup $(DESTDIR)/usr/lib/bootc/initramfs-setup + install -D -m 0755 -t $(DESTDIR)/usr/lib/bootc crates/initramfs/luks-firstboot/bootc-luks-firstboot.sh install -D -m 0755 -t $(DESTDIR)/usr/lib/dracut/modules.d/51bootc crates/initramfs/dracut/module-setup.sh # Run this to also take over the functionality of `ostree container` for example. diff --git a/crates/initramfs/bootc-luks-firstboot.service b/crates/initramfs/bootc-luks-firstboot.service new file mode 100644 index 000000000..7af585979 --- /dev/null +++ b/crates/initramfs/bootc-luks-firstboot.service @@ -0,0 +1,34 @@ +[Unit] +Description=bootc first-boot LUKS encryption +Documentation=man:bootc(1) +DefaultDependencies=no +ConditionKernelCommandLine=rd.bootc.luks.encrypt +ConditionPathExists=/etc/initrd-release + +# Run before the root filesystem is mounted. We need the root block device +# to be available but not yet mounted, so we can encrypt it in-place. +# After encryption, cryptsetup reencrypt auto-opens the device as +# /dev/mapper/cr_root, and udev creates the by-uuid symlink so that the +# root=UUID= karg resolves to the encrypted device. +Before=sysroot.mount +Before=initrd-root-fs.target + +# We need block devices to be available and udev to have settled +After=systemd-udev-settle.service +After=dracut-initqueue.service +Wants=systemd-udev-settle.service + +# If we fail, drop to emergency shell -- do not leave the system +# with a half-encrypted root partition +OnFailure=emergency.target +OnFailureJobMode=isolate + +[Service] +Type=oneshot +ExecStart=/usr/lib/bootc/bootc-luks-firstboot.sh +StandardInput=null +StandardOutput=journal+console +StandardError=journal+console +RemainAfterExit=yes +# Encryption of a large root partition can take several minutes +TimeoutStartSec=900 diff --git a/crates/initramfs/dracut/module-setup.sh b/crates/initramfs/dracut/module-setup.sh index 2e5187dfd..b333dd4bf 100755 --- a/crates/initramfs/dracut/module-setup.sh +++ b/crates/initramfs/dracut/module-setup.sh @@ -1,6 +1,6 @@ #!/bin/bash installkernel() { - instmods erofs overlay + instmods erofs overlay dm_crypt } check() { # We are never installed by default; see 10-bootc-base.conf @@ -17,4 +17,15 @@ install() { mkdir -p "${initdir}${systemdsystemconfdir}/initrd-root-fs.target.wants" ln_r "${systemdsystemunitdir}/${service}" \ "${systemdsystemconfdir}/initrd-root-fs.target.wants/${service}" + + # First-boot LUKS encryption support + local luks_service=bootc-luks-firstboot.service + if [ -x /usr/lib/bootc/bootc-luks-firstboot.sh ]; then + dracut_install /usr/lib/bootc/bootc-luks-firstboot.sh + dracut_install cryptsetup systemd-cryptenroll blkid sed awk grep + inst_simple "${systemdsystemunitdir}/${luks_service}" + mkdir -p "${initdir}${systemdsystemconfdir}/sysroot.mount.requires" + ln_r "${systemdsystemunitdir}/${luks_service}" \ + "${systemdsystemconfdir}/sysroot.mount.requires/${luks_service}" + fi } diff --git a/crates/initramfs/luks-firstboot/bootc-luks-firstboot.sh b/crates/initramfs/luks-firstboot/bootc-luks-firstboot.sh new file mode 100644 index 000000000..af8ce77c7 --- /dev/null +++ b/crates/initramfs/luks-firstboot/bootc-luks-firstboot.sh @@ -0,0 +1,184 @@ +#!/bin/bash +# bootc-luks-firstboot -- encrypt root partition on first boot +# +# This script runs in the initrd before sysroot.mount. It checks for the +# rd.bootc.luks.encrypt kernel argument and, if present, encrypts the root +# partition in-place using cryptsetup reencrypt --encrypt. +# +# The root partition must have been created with 32MB of trailing free space +# (filesystem smaller than partition) by bootc install to-disk. +# +# After encryption: +# - The root device is available as /dev/mapper/cr_root +# - TPM2 is enrolled via systemd-cryptenroll +# - A recovery key is generated and printed to the console +# - /etc/crypttab is written inside the encrypted root +# - BLS entries are updated with rd.luks.uuid kargs +# - The rd.bootc.luks.encrypt trigger karg is removed +# +# The root=UUID= karg does NOT need to change. Once the initrd +# unlocks LUKS via rd.luks.uuid on subsequent boots, the ext4 UUID becomes +# visible on /dev/mapper/cr_root and systemd resolves root= normally. +# +# SPDX-License-Identifier: Apache-2.0 OR MIT + +set -euo pipefail + +ENCRYPT_KARG="" +ROOT_DEV="" +LUKS_NAME="cr_root" + +log() { + echo "bootc-luks-firstboot: $*" >&2 +} + +die() { + log "FATAL: $*" + exit 1 +} + +parse_cmdline() { + local arg + local -a cmdline_args + read -r -a cmdline_args < /proc/cmdline + + for arg in "${cmdline_args[@]}"; do + case "$arg" in + rd.bootc.luks.encrypt=*) + ENCRYPT_KARG="${arg#rd.bootc.luks.encrypt=}" + ;; + root=UUID=*) + local uuid="${arg#root=UUID=}" + ROOT_DEV=$(blkid -U "$uuid" 2>/dev/null) || true + ;; + root=/dev/*) + ROOT_DEV="${arg#root=}" + ;; + esac + done +} + + +encrypt_root() { + log "Encrypting root device $ROOT_DEV (method: $ENCRYPT_KARG)" + + # Generate a temporary passphrase for initial encryption. This will be + # replaced by TPM2 enrollment below. + local tmp_passphrase + tmp_passphrase=$(cat /proc/sys/kernel/random/uuid) + + # Encrypt in-place. The filesystem was created 32MB smaller than the + # partition by bootc, so cryptsetup uses the trailing space for the + # LUKS2 header. The device is auto-opened as /dev/mapper/$LUKS_NAME. + log "Running cryptsetup reencrypt --encrypt --reduce-device-size 32M ..." + echo -n "$tmp_passphrase" | cryptsetup reencrypt \ + --encrypt \ + --reduce-device-size 32M \ + --batch-mode \ + "$ROOT_DEV" "$LUKS_NAME" \ + --key-file=- + + log "Encryption complete. Device: /dev/mapper/$LUKS_NAME" + + # Enroll TPM2. --wipe-slot=all removes the temporary passphrase and + # binds unlock to the local TPM2 device with default PCR policy. + if [ "$ENCRYPT_KARG" = "tpm2" ]; then + log "Enrolling TPM2..." + echo -n "$tmp_passphrase" | systemd-cryptenroll \ + --unlock-key-file=/dev/stdin \ + --tpm2-device=auto \ + --wipe-slot=all \ + "$ROOT_DEV" + log "TPM2 enrolled, temporary passphrase removed" + + # Add a recovery key. systemd-cryptenroll --recovery-key generates + # a high-entropy key and prints it to stdout. We capture and display + # it on the console for the user to record. + log "Generating recovery key..." + local recovery_output + recovery_output=$(systemd-cryptenroll \ + --tpm2-device=auto \ + --recovery-key \ + "$ROOT_DEV" 2>&1) || { + log "WARNING: Could not add recovery key: $recovery_output" + } + # Print the recovery key prominently so the user can record it + echo "" + echo "========================================================" + echo " LUKS RECOVERY KEY -- RECORD THIS NOW" + echo " $recovery_output" + echo "========================================================" + echo "" + fi +} + +configure_system() { + local luks_uuid + luks_uuid=$(cryptsetup luksDump "$ROOT_DEV" | awk '/^UUID:/{print $2; exit}') + log "LUKS UUID: $luks_uuid" + + # Mount the encrypted root to update its configuration + local mnt="/run/bootc-luks-mnt" + mkdir -p "$mnt" + mount /dev/mapper/"$LUKS_NAME" "$mnt" + + # Write crypttab inside the ostree deploy directory + local deploy_etc + deploy_etc=$(find "$mnt/ostree/deploy" -maxdepth 4 -name "etc" -type d | head -1) + if [ -n "$deploy_etc" ]; then + echo "$LUKS_NAME UUID=$luks_uuid - tpm2-device=auto" > "$deploy_etc/crypttab" + log "Written crypttab: $deploy_etc/crypttab" + else + log "WARNING: Could not find ostree deploy etc directory" + fi + + # Update BLS entries. These may be on /boot (separate partition, already + # mounted by the initrd) or inside the encrypted root at /boot/loader/. + # Check both locations. + local updated=0 + local entry + for entry in /boot/loader/entries/*.conf "$mnt"/boot/loader/entries/*.conf; do + [ -f "$entry" ] || continue + if grep -q "rd.bootc.luks.encrypt" "$entry"; then + # Remove the first-boot trigger karg + sed -i 's/ rd.bootc.luks.encrypt=[^ ]*//' "$entry" + # Add LUKS unlock kargs. The root=UUID= karg stays unchanged -- + # once systemd-cryptsetup unlocks LUKS via rd.luks.uuid, the + # ext4 UUID inside becomes visible and root= resolves normally. + sed -i "s|^options |options rd.luks.uuid=$luks_uuid rd.luks.name=$luks_uuid=$LUKS_NAME rd.luks.options=$luks_uuid=tpm2-device=auto,headless=true |" "$entry" + updated=$((updated + 1)) + log "Updated BLS entry: $entry" + fi + done + + if [ "$updated" -eq 0 ]; then + log "WARNING: No BLS entries found to update" + fi + + umount "$mnt" +} + +# Main +parse_cmdline + +if [ -z "$ENCRYPT_KARG" ]; then + log "No encryption requested. Exiting." + exit 0 +fi + +if [ -z "$ROOT_DEV" ]; then + die "rd.bootc.luks.encrypt set but no root= device found" +fi + +if ! cryptsetup isLuks "$ROOT_DEV" 2>/dev/null; then + encrypt_root +else + log "Root device $ROOT_DEV is already LUKS. Skipping encryption." +fi + +# Always run configure_system when the karg is present. This handles +# the case where a previous boot encrypted the device but was +# interrupted before BLS entries were updated. +configure_system + +log "First-boot encryption complete." diff --git a/crates/lib/src/install/baseline.rs b/crates/lib/src/install/baseline.rs index 9b393b467..d1113b30c 100644 --- a/crates/lib/src/install/baseline.rs +++ b/crates/lib/src/install/baseline.rs @@ -8,7 +8,6 @@ use std::borrow::Cow; use std::fmt::Display; use std::fmt::Write as _; -use std::io::Write; use std::process::Command; use std::process::Stdio; @@ -107,6 +106,23 @@ fn mkfs<'a>( label: &str, wipe: bool, opts: impl IntoIterator, +) -> Result { + mkfs_with_reserve(dev, fs, label, wipe, opts, 0) +} + +/// Create a filesystem, optionally reserving trailing space on the device. +/// +/// When `reserve_mib` is non-zero, the filesystem is created smaller than the +/// partition by that amount. This is used to reserve space for a LUKS header +/// that will be inserted on first boot via `cryptsetup reencrypt --encrypt`. +#[cfg(feature = "install-to-disk")] +fn mkfs_with_reserve<'a>( + dev: &str, + fs: Filesystem, + label: &str, + wipe: bool, + opts: impl IntoIterator, + reserve_mib: u32, ) -> Result { let devinfo = bootc_blockdev::list_dev(dev.into())?; let size = ostree_ext::glib::format_size(devinfo.size); @@ -125,8 +141,20 @@ fn mkfs<'a>( } t.cmd.arg("-m"); t.cmd.arg(format!("uuid={u}")); + if reserve_mib > 0 { + let fs_bytes = devinfo.size - u64::from(reserve_mib) * 1024 * 1024; + t.cmd.args(["-d", &format!("size={fs_bytes}")]); + } } - Filesystem::Btrfs | Filesystem::Ext4 => { + Filesystem::Btrfs => { + t.cmd.arg("-U"); + t.cmd.arg(u.to_string()); + if reserve_mib > 0 { + let fs_bytes = devinfo.size - u64::from(reserve_mib) * 1024 * 1024; + t.cmd.args(["-b", &fs_bytes.to_string()]); + } + } + Filesystem::Ext4 => { t.cmd.arg("-U"); t.cmd.arg(u.to_string()); } @@ -135,6 +163,12 @@ fn mkfs<'a>( t.cmd.args(["-L", label]); t.cmd.args(opts); t.cmd.arg(dev); + // For ext4 with reserved space, append the filesystem size in 1K blocks + // as a positional argument after the device path. + if reserve_mib > 0 && fs == Filesystem::Ext4 { + let fs_blocks_1k = (devinfo.size - u64::from(reserve_mib) * 1024 * 1024) / 1024; + t.cmd.arg(fs_blocks_1k.to_string()); + } // All the mkfs commands are unnecessarily noisy by default t.cmd.stdout(Stdio::null()); // But this one is notable so let's print the whole thing with verbose() @@ -172,7 +206,6 @@ pub(crate) fn install_create_rootfs( opts: InstallBlockDeviceOpts, ) -> Result { let install_config = state.install_config.as_ref(); - let luks_name = "root"; // Ensure we have a root filesystem upfront let root_filesystem = opts .filesystem @@ -347,44 +380,20 @@ pub(crate) fn install_create_rootfs( root_device.parttype.as_deref().unwrap_or("") ); } - let (rootdev_path, root_blockdev_kargs) = match block_setup { - BlockSetup::Direct => (root_device.path(), None), - BlockSetup::Tpm2Luks => { - let uuid = uuid::Uuid::new_v4().to_string(); - // This will be replaced via --wipe-slot=all when binding to tpm below - let dummy_passphrase = uuid::Uuid::new_v4().to_string(); - let mut tmp_keyfile = tempfile::NamedTempFile::new()?; - tmp_keyfile.write_all(dummy_passphrase.as_bytes())?; - tmp_keyfile.flush()?; - let tmp_keyfile = tmp_keyfile.path(); - let dummy_passphrase_input = Some(dummy_passphrase.as_bytes()); - - let root_devpath = root_device.path(); - - Task::new("Initializing LUKS for root", "cryptsetup") - .args(["luksFormat", "--uuid", uuid.as_str(), "--key-file"]) - .args([tmp_keyfile]) - .arg(&root_devpath) - .run()?; - // The --wipe-slot=all removes our temporary passphrase, and binds to the local TPM device. - // We also use .verbose() here as the details are important/notable. - Task::new("Enrolling root device with TPM", "systemd-cryptenroll") - .args(["--wipe-slot=all", "--tpm2-device=auto", "--unlock-key-file"]) - .args([tmp_keyfile]) - .arg(&root_devpath) - .verbose() - .run_with_stdin_buf(dummy_passphrase_input)?; - Task::new("Opening root LUKS device", "cryptsetup") - .args(["luksOpen", &root_devpath, luks_name]) - .run()?; - let rootdev = format!("/dev/mapper/{luks_name}"); - let kargs = vec![ - format!("luks.uuid={uuid}"), - format!("luks.options=tpm2-device=auto,headless=true"), - ]; - (rootdev, Some(kargs)) - } + // For tpm2-luks, we reserve space for a LUKS header but do not perform + // any encryption during install. Encryption is deferred to first boot, + // where it runs on real hardware with access to the actual TPM device + // and correct firmware state (PCRs, shim version). This avoids both the + // IPC namespace deadlock (#2089) and the shim/PCR mismatch (#421). + let luks_reserve_mib: u32 = match block_setup { + BlockSetup::Direct => 0, + BlockSetup::Tpm2Luks => 32, + }; + let root_blockdev_kargs = match block_setup { + BlockSetup::Direct => None, + BlockSetup::Tpm2Luks => Some(vec!["rd.bootc.luks.encrypt=tpm2".to_string()]), }; + let rootdev_path = root_device.path(); // Initialize the /boot filesystem let bootdev = if let Some(bootpn) = boot_partno { @@ -407,13 +416,15 @@ pub(crate) fn install_create_rootfs( _ => [].as_slice(), }; - // Initialize rootfs - let root_uuid = mkfs( + // Initialize rootfs. When encrypting on first boot, reserve trailing space + // for the LUKS header that cryptsetup reencrypt --encrypt will insert. + let root_uuid = mkfs_with_reserve( &rootdev_path, root_filesystem, "root", opts.wipe, mkfs_options.iter().copied(), + luks_reserve_mib, )?; let rootarg = format!("root=UUID={root_uuid}"); let bootsrc = boot_uuid.as_ref().map(|uuid| format!("UUID={uuid}")); @@ -474,10 +485,9 @@ pub(crate) fn install_create_rootfs( std::fs::create_dir(&efifs_path).context("Creating efi dir")?; } - let luks_device = match block_setup { - BlockSetup::Direct => None, - BlockSetup::Tpm2Luks => Some(luks_name.to_string()), - }; + // With first-boot LUKS, no dm-crypt device is opened during install, + // so there is nothing to close. The root partition is written unencrypted. + let luks_device = None; Ok(RootSetup { luks_device, device_info: device,