diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fce8a9ca..6b23a878 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -111,6 +111,8 @@ jobs: target: i686-unknown-linux-gnu - rust: nightly target: hexagon-unknown-linux-musl + - rust: nightly-2023-08-23 # The last nightly version that doesn't support MaybeUninit registers. + target: hexagon-unknown-linux-musl - rust: '1.72' # inline asm for loongarch has been stabilized in Rust 1.72 target: loongarch64-unknown-linux-gnu - rust: stable @@ -163,6 +165,8 @@ jobs: target: powerpc64le-unknown-linux-gnu - rust: nightly target: riscv32gc-unknown-linux-gnu + - rust: nightly-2023-08-23 # The last nightly version that doesn't support MaybeUninit registers. + target: riscv32gc-unknown-linux-gnu - rust: '1.59' target: riscv64gc-unknown-linux-gnu - rust: stable @@ -246,6 +250,12 @@ jobs: RUSTFLAGS: ${{ env.RUSTFLAGS }} -C target-feature=+v8 RUSTDOCFLAGS: ${{ env.RUSTDOCFLAGS }} -C target-feature=+v8 if: matrix.target == 'armv7-unknown-linux-gnueabi' + # arm v6 with legacy cp15_barrier + - run: tools/test.sh -vv --tests $TARGET $BUILD_STD $RELEASE + env: + RUSTFLAGS: ${{ env.RUSTFLAGS }} --cfg atomic_maybe_uninit_use_cp15_barrier + RUSTDOCFLAGS: ${{ env.RUSTDOCFLAGS }} --cfg atomic_maybe_uninit_use_cp15_barrier + if: matrix.target == 'arm-unknown-linux-gnueabi' # arm v7 big endian # armeb-unknown-linux-gnueabi is v8 by default, use custom target instead - run: tools/test.sh -vv --tests --target armebv7-unknown-linux-gnueabi -Z build-std $RELEASE @@ -292,6 +302,8 @@ jobs: - '1.59' - stable - beta + - nightly-2023-08-23 # The last nightly version that doesn't support MaybeUninit registers. + - nightly-2023-08-24 # The oldest nightly version that supports MaybeUninit registers: https://github.com/rust-lang/rust/pull/114790 - nightly runs-on: ubuntu-latest timeout-minutes: 60 diff --git a/build.rs b/build.rs index e810b464..8c21dbd4 100644 --- a/build.rs +++ b/build.rs @@ -45,6 +45,10 @@ fn main() { if !version.probe(61, 2022, 3, 7) { println!("cargo:rustc-cfg=atomic_maybe_uninit_no_const_fn_trait_bound"); } + // https://github.com/rust-lang/rust/pull/114790 merged in nightly-2023-08-24 + if !version.probe(74, 2023, 8, 23) { + println!("cargo:rustc-cfg=atomic_maybe_uninit_no_asm_maybe_uninit"); + } match target_arch { "loongarch64" => { @@ -70,8 +74,8 @@ fn main() { _ => {} } - let is_apple = - target_os == "macos" || target_os == "ios" || target_os == "tvos" || target_os == "watchos"; + let is_macos = target_os == "macos"; + let is_apple = is_macos || target_os == "ios" || target_os == "tvos" || target_os == "watchos"; match target_arch { "x86_64" => { // x86_64 Apple targets always support CMPXCHG16B: @@ -126,7 +130,6 @@ fn main() { "aarch64" => { // aarch64 macOS always supports FEAT_LSE/FEAT_LSE2/FEAT_LRCPC because it is armv8.5-a: // https://github.com/llvm/llvm-project/blob/llvmorg-17.0.0-rc2/llvm/include/llvm/TargetParser/AArch64TargetParser.h#L494 - let is_macos = target_os == "macos"; let mut has_lse = is_macos; let mut has_rcpc = is_macos; // FEAT_LSE2 doesn't imply FEAT_LSE. FEAT_LSE128 implies FEAT_LSE but not FEAT_LSE2. FEAT_LRCPC3 implies FEAT_LRCPC. diff --git a/src/arch/aarch64.rs b/src/arch/aarch64.rs index 3714bee5..aa3f2614 100644 --- a/src/arch/aarch64.rs +++ b/src/arch/aarch64.rs @@ -14,15 +14,15 @@ // - portable-atomic https://github.com/taiki-e/portable-atomic // // Generated asm: -// - aarch64 https://godbolt.org/z/6TKofhrbb -// - aarch64 msvc https://godbolt.org/z/5GzETjcE7 -// - aarch64 (+lse) https://godbolt.org/z/7jK5vej7b -// - aarch64 msvc (+lse) https://godbolt.org/z/896zWazdW -// - aarch64 (+lse,+lse2) https://godbolt.org/z/66cMd4Ys6 -// - aarch64 (+lse,+lse2,+rcpc3) https://godbolt.org/z/ojbaYn9Kf -// - aarch64 (+rcpc) https://godbolt.org/z/4ahePW8TK -// - aarch64 (+lse2,+lse128) https://godbolt.org/z/joMq5vv1h -// - aarch64 (+lse2,+lse128,+rcpc3) https://godbolt.org/z/WdbsccKcz +// - aarch64 https://godbolt.org/z/Yc7YK3cYa +// - aarch64 msvc https://godbolt.org/z/vYz67b7Pn +// - aarch64 (+lse) https://godbolt.org/z/vveGb37f6 +// - aarch64 msvc (+lse) https://godbolt.org/z/b5WxaMP76 +// - aarch64 (+lse,+lse2) https://godbolt.org/z/nhbs6aeWP +// - aarch64 (+lse,+lse2,+rcpc3) https://godbolt.org/z/zWjhaGM3o +// - aarch64 (+rcpc) https://godbolt.org/z/GWz6rYGn8 +// - aarch64 (+lse2,+lse128) https://godbolt.org/z/z5aTbbfEa +// - aarch64 (+lse2,+lse128,+rcpc3) https://godbolt.org/z/rqa81e117 use core::{ arch::asm, @@ -30,7 +30,10 @@ use core::{ sync::atomic::Ordering, }; -use crate::raw::{AtomicCompareExchange, AtomicLoad, AtomicStore, AtomicSwap}; +use crate::{ + raw::{AtomicCompareExchange, AtomicLoad, AtomicStore, AtomicSwap}, + utils::{MaybeUninit128, Pair}, +}; macro_rules! atomic_rmw { ($op:ident, $order:ident) => { @@ -59,24 +62,20 @@ macro_rules! atomic { #[inline] unsafe fn atomic_load( src: *const MaybeUninit, - out: *mut MaybeUninit, order: Ordering, - ) { + ) -> MaybeUninit { debug_assert!(src as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); + let out: MaybeUninit; // SAFETY: the caller must uphold the safety contract. unsafe { macro_rules! atomic_load { ($acquire:tt) => { asm!( - // (atomic) load from src to tmp - concat!("ld", $acquire, "r", $asm_suffix, " {tmp", $val_modifier, "}, [{src}]"), - // store tmp to out - concat!("str", $asm_suffix, " {tmp", $val_modifier, "}, [{out}]"), + // (atomic) load from src to out + concat!("ld", $acquire, "r", $asm_suffix, " {out", $val_modifier, "}, [{src}]"), src = in(reg) ptr_reg!(src), - out = inout(reg) ptr_reg!(out) => _, - tmp = lateout(reg) _, + out = lateout(reg) out, options(nostack, preserves_flags), ) }; @@ -87,13 +86,10 @@ macro_rules! atomic { Ordering::Acquire => { // SAFETY: cfg guarantee that the CPU supports FEAT_LRCPC. asm!( - // (atomic) load from src to tmp - concat!("ldapr", $asm_suffix, " {tmp", $val_modifier, "}, [{src}]"), - // store tmp to out - concat!("str", $asm_suffix, " {tmp", $val_modifier, "}, [{out}]"), + // (atomic) load from src to out + concat!("ldapr", $asm_suffix, " {out", $val_modifier, "}, [{src}]"), src = in(reg) ptr_reg!(src), - out = inout(reg) ptr_reg!(out) => _, - tmp = lateout(reg) _, + out = lateout(reg) out, options(nostack, preserves_flags), ); } @@ -103,31 +99,28 @@ macro_rules! atomic { _ => unreachable!("{:?}", order), } } + out } } impl AtomicStore for $int_type { #[inline] unsafe fn atomic_store( dst: *mut MaybeUninit, - val: *const MaybeUninit, + val: MaybeUninit, order: Ordering, ) { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(val as usize % mem::align_of::<$int_type>() == 0); // SAFETY: the caller must uphold the safety contract. unsafe { macro_rules! atomic_store { ($release:tt, $fence:tt) => { asm!( - // load from val to tmp - concat!("ldr", $asm_suffix, " {tmp", $val_modifier, "}, [{val}]"), - // (atomic) store tmp to dst - concat!("st", $release, "r", $asm_suffix, " {tmp", $val_modifier, "}, [{dst}]"), + // (atomic) store val to dst + concat!("st", $release, "r", $asm_suffix, " {val", $val_modifier, "}, [{dst}]"), $fence, - dst = inout(reg) ptr_reg!(dst) => _, - val = in(reg) ptr_reg!(val), - tmp = lateout(reg) _, + dst = in(reg) ptr_reg!(dst), + val = in(reg) val, options(nostack, preserves_flags), ) }; @@ -151,13 +144,11 @@ macro_rules! atomic { #[inline] unsafe fn atomic_swap( dst: *mut MaybeUninit, - val: *const MaybeUninit, - out: *mut MaybeUninit, + val: MaybeUninit, order: Ordering, - ) { + ) -> MaybeUninit { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(val as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); + let mut out: MaybeUninit; // SAFETY: the caller must uphold the safety contract. unsafe { @@ -165,18 +156,12 @@ macro_rules! atomic { macro_rules! swap { ($acquire:tt, $release:tt, $fence:tt) => { asm!( - // load from val to tmp - concat!("ldr", $asm_suffix, " {tmp", $val_modifier, "}, [{val}]"), // (atomic) swap // Refs: https://developer.arm.com/documentation/dui0801/g/A64-Data-Transfer-Instructions/SWPA--SWPAL--SWP--SWPL--SWPAL--SWP--SWPL - concat!("swp", $acquire, $release, $asm_suffix, " {tmp", $val_modifier, "}, {tmp", $val_modifier, "}, [{dst}]"), + concat!("swp", $acquire, $release, $asm_suffix, " {val", $val_modifier, "}, {val", $val_modifier, "}, [{dst}]"), $fence, - // store tmp to out - concat!("str", $asm_suffix, " {tmp", $val_modifier, "}, [{out}]"), - dst = inout(reg) ptr_reg!(dst) => _, - val = in(reg) ptr_reg!(val), - out = inout(reg) ptr_reg!(out) => _, - tmp = lateout(reg) _, + dst = in(reg) ptr_reg!(dst), + val = inout(reg) val => out, options(nostack, preserves_flags), ) }; @@ -185,48 +170,40 @@ macro_rules! atomic { macro_rules! swap { ($acquire:tt, $release:tt, $fence:tt) => { asm!( - // load from val to val_tmp - concat!("ldr", $asm_suffix, " {val_tmp", $val_modifier, "}, [{val}]"), // (atomic) swap (LL/SC loop) "2:", - // load from dst to out_tmp - concat!("ld", $acquire, "xr", $asm_suffix, " {out_tmp", $val_modifier, "}, [{dst}]"), + // load from dst to out + concat!("ld", $acquire, "xr", $asm_suffix, " {out", $val_modifier, "}, [{dst}]"), // try to store val to dst - concat!("st", $release, "xr", $asm_suffix, " {r:w}, {val_tmp", $val_modifier, "}, [{dst}]"), + concat!("st", $release, "xr", $asm_suffix, " {r:w}, {val", $val_modifier, "}, [{dst}]"), // 0 if the store was successful, 1 if no store was performed "cbnz {r:w}, 2b", $fence, - // store out_tmp to out - concat!("str", $asm_suffix, " {out_tmp", $val_modifier, "}, [{out}]"), - dst = inout(reg) ptr_reg!(dst) => _, - val = in(reg) ptr_reg!(val), - val_tmp = out(reg) _, - out = inout(reg) ptr_reg!(out) => _, - out_tmp = out(reg) _, - r = lateout(reg) _, + dst = in(reg) ptr_reg!(dst), + val = in(reg) val, + out = out(reg) out, + r = out(reg) _, options(nostack, preserves_flags), ) }; } atomic_rmw!(swap, order); } + out } } impl AtomicCompareExchange for $int_type { #[inline] unsafe fn atomic_compare_exchange( dst: *mut MaybeUninit, - old: *const MaybeUninit, - new: *const MaybeUninit, - out: *mut MaybeUninit, + old: MaybeUninit, + new: MaybeUninit, success: Ordering, failure: Ordering, - ) -> bool { + ) -> (MaybeUninit, bool) { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(old as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(new as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); let order = crate::utils::upgrade_success_ordering(success, failure); + let mut out: MaybeUninit; // SAFETY: the caller must uphold the safety contract. unsafe { @@ -235,48 +212,37 @@ macro_rules! atomic { macro_rules! cmpxchg { ($acquire:tt, $release:tt, $fence:tt) => {{ asm!( - // load from old/new to old_tmp/new_tmp - concat!("ldr", $asm_suffix, " {old_tmp", $val_modifier, "}, [{old}]"), - concat!("ldr", $asm_suffix, " {new_tmp", $val_modifier, "}, [{new}]"), // cas writes the current value to the first register, // so copy the `old`'s value for later comparison. - concat!("mov {out_tmp", $val_modifier, "}, {old_tmp", $val_modifier, "}"), + concat!("mov {out", $val_modifier, "}, {old", $val_modifier, "}"), // (atomic) CAS // Refs: https://developer.arm.com/documentation/dui0801/g/A64-Data-Transfer-Instructions/CASA--CASAL--CAS--CASL--CASAL--CAS--CASL - concat!("cas", $acquire, $release, $asm_suffix, " {out_tmp", $val_modifier, "}, {new_tmp", $val_modifier, "}, [{dst}]"), + concat!("cas", $acquire, $release, $asm_suffix, " {out", $val_modifier, "}, {new", $val_modifier, "}, [{dst}]"), $fence, - concat!("cmp {out_tmp", $val_modifier, "}, {old_tmp", $val_modifier, "}"), - // store out_tmp to out - concat!("str", $asm_suffix, " {out_tmp", $val_modifier, "}, [{out}]"), + concat!("cmp {out", $val_modifier, "}, {old", $val_modifier, "}"), "cset {r:w}, eq", - dst = inout(reg) ptr_reg!(dst) => _, - old = in(reg) ptr_reg!(old), - old_tmp = out(reg) _, - new = in(reg) ptr_reg!(new), - new_tmp = out(reg) _, - out = inout(reg) ptr_reg!(out) => _, - out_tmp = out(reg) _, + dst = in(reg) ptr_reg!(dst), + old = in(reg) crate::utils::zero_extend(old), + new = in(reg) new, + out = out(reg) out, r = lateout(reg) r, // Do not use `preserves_flags` because CMP modifies the condition flags. options(nostack), ); debug_assert!(r == 0 || r == 1, "r={}", r); - r != 0 + (out, r != 0) }}; } #[cfg(not(any(target_feature = "lse", atomic_maybe_uninit_target_feature = "lse")))] macro_rules! cmpxchg { ($acquire:tt, $release:tt, $fence:tt) => {{ asm!( - // load from old/new to old_tmp/new_tmp - concat!("ldr", $asm_suffix, " {new_tmp", $val_modifier, "}, [{new}]"), - concat!("ldr", $asm_suffix, " {old_tmp", $val_modifier, "}, [{old}]"), // (atomic) CAS (LL/SC loop) "2:", - concat!("ld", $acquire, "xr", $asm_suffix, " {out_tmp", $val_modifier, "}, [{dst}]"), - concat!("cmp {out_tmp", $val_modifier, "}, {old_tmp", $val_modifier, "}"), + concat!("ld", $acquire, "xr", $asm_suffix, " {out", $val_modifier, "}, [{dst}]"), + concat!("cmp {out", $val_modifier, "}, {old", $val_modifier, "}"), "b.ne 3f", // jump if compare failed - concat!("st", $release, "xr", $asm_suffix, " {r:w}, {new_tmp", $val_modifier, "}, [{dst}]"), + concat!("st", $release, "xr", $asm_suffix, " {r:w}, {new", $val_modifier, "}, [{dst}]"), // 0 if the store was successful, 1 if no store was performed "cbnz {r:w}, 2b", // continue loop if store failed $fence, @@ -285,22 +251,17 @@ macro_rules! atomic { "mov {r:w}, #1", // mark as failed "clrex", "4:", - // store out_tmp to out - concat!("str", $asm_suffix, " {out_tmp", $val_modifier, "}, [{out}]"), - dst = inout(reg) ptr_reg!(dst) => _, - old = in(reg) ptr_reg!(old), - old_tmp = out(reg) _, - new = in(reg) ptr_reg!(new), - new_tmp = out(reg) _, - out = inout(reg) ptr_reg!(out) => _, - out_tmp = out(reg) _, - r = lateout(reg) r, + dst = in(reg) ptr_reg!(dst), + old = in(reg) crate::utils::zero_extend(old), + new = in(reg) new, + out = out(reg) out, + r = out(reg) r, // Do not use `preserves_flags` because CMP modifies the condition flags. options(nostack), ); debug_assert!(r == 0 || r == 1, "r={}", r); // 0 if the store was successful, 1 if no store was performed - r == 0 + (out, r == 0) }}; } atomic_rmw!(cmpxchg, order, write = success) @@ -310,17 +271,14 @@ macro_rules! atomic { #[inline] unsafe fn atomic_compare_exchange_weak( dst: *mut MaybeUninit, - old: *const MaybeUninit, - new: *const MaybeUninit, - out: *mut MaybeUninit, + old: MaybeUninit, + new: MaybeUninit, success: Ordering, failure: Ordering, - ) -> bool { + ) -> (MaybeUninit, bool) { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(old as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(new as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); let order = crate::utils::upgrade_success_ordering(success, failure); + let mut out: MaybeUninit; // SAFETY: the caller must uphold the safety contract. unsafe { @@ -328,14 +286,11 @@ macro_rules! atomic { macro_rules! cmpxchg_weak { ($acquire:tt, $release:tt, $fence:tt) => { asm!( - // load from old/new to old_tmp/new_tmp - concat!("ldr", $asm_suffix, " {new_tmp", $val_modifier, "}, [{new}]"), - concat!("ldr", $asm_suffix, " {old_tmp", $val_modifier, "}, [{old}]"), // (atomic) CAS - concat!("ld", $acquire, "xr", $asm_suffix, " {out_tmp", $val_modifier, "}, [{dst}]"), - concat!("cmp {out_tmp", $val_modifier, "}, {old_tmp", $val_modifier, "}"), + concat!("ld", $acquire, "xr", $asm_suffix, " {out", $val_modifier, "}, [{dst}]"), + concat!("cmp {out", $val_modifier, "}, {old", $val_modifier, "}"), "b.ne 3f", - concat!("st", $release, "xr", $asm_suffix, " {r:w}, {new_tmp", $val_modifier, "}, [{dst}]"), + concat!("st", $release, "xr", $asm_suffix, " {r:w}, {new", $val_modifier, "}, [{dst}]"), // TODO: only emit when the above sc succeed // // 0 if the store was successful, 1 if no store was performed // "cbnz {r:w}, 4f", @@ -345,16 +300,11 @@ macro_rules! atomic { "mov {r:w}, #1", "clrex", "4:", - // store out_tmp to out - concat!("str", $asm_suffix, " {out_tmp", $val_modifier, "}, [{out}]"), dst = inout(reg) ptr_reg!(dst) => _, - old = in(reg) ptr_reg!(old), - old_tmp = out(reg) _, - new = in(reg) ptr_reg!(new), - new_tmp = out(reg) _, - out = inout(reg) ptr_reg!(out) => _, - out_tmp = out(reg) _, - r = lateout(reg) r, + old = in(reg) crate::utils::zero_extend(old), + new = in(reg) new, + out = out(reg) out, + r = out(reg) r, // Do not use `preserves_flags` because CMP modifies the condition flags. options(nostack), ) @@ -363,7 +313,7 @@ macro_rules! atomic { atomic_rmw!(cmpxchg_weak, order, write = success); debug_assert!(r == 0 || r == 1, "r={}", r); // 0 if the store was successful, 1 if no store was performed - r == 0 + (out, r == 0) } } } @@ -421,11 +371,10 @@ macro_rules! atomic128 { #[inline] unsafe fn atomic_load( src: *const MaybeUninit, - out: *mut MaybeUninit, order: Ordering, - ) { + ) -> MaybeUninit { debug_assert!(src as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); + let (mut prev_lo, mut prev_hi); #[cfg(any(target_feature = "lse2", atomic_maybe_uninit_target_feature = "lse2"))] // SAFETY: the caller must guarantee that `dst` is valid for reads, @@ -436,14 +385,11 @@ macro_rules! atomic128 { ($acquire:tt) => { asm!( // (atomic) load from src to tmp pair - "ldp {tmp_lo}, {tmp_hi}, [{src}]", + "ldp {prev_lo}, {prev_hi}, [{src}]", $acquire, - // store tmp pair to out - "stp {tmp_lo}, {tmp_hi}, [{out}]", src = in(reg) ptr_reg!(src), - out = in(reg) ptr_reg!(out), - tmp_hi = out(reg) _, - tmp_lo = out(reg) _, + prev_hi = lateout(reg) prev_hi, + prev_lo = lateout(reg) prev_lo, options(nostack, preserves_flags), ) }; @@ -456,13 +402,10 @@ macro_rules! atomic128 { // Refs: https://developer.arm.com/documentation/ddi0602/2023-03/Base-Instructions/LDIAPP--Load-Acquire-RCpc-ordered-Pair-of-registers- asm!( // (atomic) load from src to tmp pair - "ldiapp {tmp_lo}, {tmp_hi}, [{src}]", - // store tmp pair to out - "stp {tmp_lo}, {tmp_hi}, [{out}]", + "ldiapp {prev_lo}, {prev_hi}, [{src}]", src = in(reg) ptr_reg!(src), - out = in(reg) ptr_reg!(out), - tmp_hi = out(reg) _, - tmp_lo = out(reg) _, + prev_hi = lateout(reg) prev_hi, + prev_lo = lateout(reg) prev_lo, options(nostack, preserves_flags), ); } @@ -474,20 +417,18 @@ macro_rules! atomic128 { // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108891 "ldar {tmp}, [{src}]", // (atomic) load from src to tmp pair - "ldp {tmp_lo}, {tmp_hi}, [{src}]", + "ldp {prev_lo}, {prev_hi}, [{src}]", "dmb ishld", - // store tmp pair to out - "stp {tmp_lo}, {tmp_hi}, [{out}]", src = in(reg) ptr_reg!(src), - out = in(reg) ptr_reg!(out), - tmp_hi = out(reg) _, - tmp_lo = out(reg) _, + prev_hi = lateout(reg) prev_hi, + prev_lo = lateout(reg) prev_lo, tmp = out(reg) _, options(nostack, preserves_flags), ); }, _ => unreachable!("{:?}", order), } + MaybeUninit128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.$int_type } #[cfg(not(any(target_feature = "lse2", atomic_maybe_uninit_target_feature = "lse2")))] // SAFETY: the caller must uphold the safety contract. @@ -501,13 +442,10 @@ macro_rules! atomic128 { // - https://developer.arm.com/documentation/dui0801/g/A64-Data-Transfer-Instructions/CASPA--CASPAL--CASP--CASPL--CASPAL--CASP--CASPL // - https://github.com/taiki-e/portable-atomic/pull/20 concat!("casp", $acquire, $release, " x2, x3, x2, x3, [{src}]"), - // store out pair to out - "stp x2, x3, [{out}]", src = in(reg) ptr_reg!(src), - out = in(reg) ptr_reg!(out), // must be allocated to even/odd register pair - inout("x2") 0_u64 => _, // out_lo - inout("x3") 0_u64 => _, // out_lo + inout("x2") 0_u64 => prev_lo, + inout("x3") 0_u64 => prev_hi, options(nostack, preserves_flags), ) }; @@ -518,18 +456,15 @@ macro_rules! atomic128 { asm!( // (atomic) load from src to tmp pair "2:", - // load from src to tmp pair - concat!("ld", $acquire, "xp {tmp_lo}, {tmp_hi}, [{src}]"), - // store tmp pair to src - concat!("st", $release, "xp {r:w}, {tmp_lo}, {tmp_hi}, [{src}]"), + // load from src to prev pair + concat!("ld", $acquire, "xp {prev_lo}, {prev_hi}, [{src}]"), + // store prev pair to src + concat!("st", $release, "xp {r:w}, {prev_lo}, {prev_hi}, [{src}]"), // 0 if the store was successful, 1 if no store was performed "cbnz {r:w}, 2b", - // store tmp pair to out - "stp {tmp_lo}, {tmp_hi}, [{out}]", src = in(reg) ptr_reg!(src), - out = in(reg) ptr_reg!(out), - tmp_hi = out(reg) _, - tmp_lo = out(reg) _, + prev_lo = out(reg) prev_lo, + prev_hi = out(reg) prev_hi, r = out(reg) _, options(nostack, preserves_flags), ) @@ -541,6 +476,7 @@ macro_rules! atomic128 { Ordering::SeqCst => atomic_load!("a", "l"), _ => unreachable!("{:?}", order), } + MaybeUninit128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.$int_type } } } @@ -548,11 +484,11 @@ macro_rules! atomic128 { #[inline] unsafe fn atomic_store( dst: *mut MaybeUninit, - val: *const MaybeUninit, + val: MaybeUninit, order: Ordering, ) { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(val as usize % mem::align_of::<$int_type>() == 0); + let val = MaybeUninit128 { $int_type: val }; #[cfg(any(target_feature = "lse2", atomic_maybe_uninit_target_feature = "lse2"))] // SAFETY: the caller must guarantee that `dst` is valid for writes, @@ -562,16 +498,13 @@ macro_rules! atomic128 { macro_rules! atomic_store { ($acquire:tt, $release:tt) => { asm!( - // load from val to val pair - "ldp {val_lo}, {val_hi}, [{val}]", // (atomic) store val pair to dst $release, "stp {val_lo}, {val_hi}, [{dst}]", $acquire, dst = in(reg) ptr_reg!(dst), - val = in(reg) ptr_reg!(val), - val_hi = out(reg) _, - val_lo = out(reg) _, + val_lo = in(reg) val.pair.lo, + val_hi = in(reg) val.pair.hi, options(nostack, preserves_flags), ) }; @@ -582,15 +515,12 @@ macro_rules! atomic128 { macro_rules! atomic_store_swpp { ($acquire:tt, $release:tt, $fence:tt) => { asm!( - // load from val to val pair - "ldp {val_lo}, {val_hi}, [{val}]", // (atomic) swap concat!("swpp", $acquire, $release, " {val_lo}, {val_hi}, [{dst}]"), $fence, dst = in(reg) ptr_reg!(dst), - val = in(reg) ptr_reg!(val), - val_hi = out(reg) _, - val_lo = out(reg) _, + val_lo = inout(reg) val.pair.lo => _, + val_hi = inout(reg) val.pair.hi => _, options(nostack, preserves_flags), ) }; @@ -602,14 +532,11 @@ macro_rules! atomic128 { // SAFETY: cfg guarantee that the CPU supports FEAT_LRCPC3. // Refs: https://developer.arm.com/documentation/ddi0602/2023-03/Base-Instructions/STILP--Store-Release-ordered-Pair-of-registers- asm!( - // load from val to val pair - "ldp {val_lo}, {val_hi}, [{val}]", // (atomic) store val pair to dst "stilp {val_lo}, {val_hi}, [{dst}]", dst = in(reg) ptr_reg!(dst), - val = in(reg) ptr_reg!(val), - val_hi = out(reg) _, - val_lo = out(reg) _, + val_lo = in(reg) val.pair.lo, + val_hi = in(reg) val.pair.hi, options(nostack, preserves_flags), ); } @@ -632,8 +559,6 @@ macro_rules! atomic128 { macro_rules! store { ($acquire:tt, $release:tt, $fence:tt) => { asm!( - // load from val to val pair - "ldp {val_lo}, {val_hi}, [{val}]", // (atomic) store val pair to dst (LL/SC loop) "2:", // load from dst to xzr/tmp pair @@ -643,11 +568,10 @@ macro_rules! atomic128 { // 0 if the store was successful, 1 if no store was performed "cbnz {tmp:w}, 2b", $fence, - dst = inout(reg) ptr_reg!(dst) => _, - val = in(reg) ptr_reg!(val), - val_hi = out(reg) _, - val_lo = out(reg) _, - tmp = lateout(reg) _, + dst = in(reg) ptr_reg!(dst), + val_lo = in(reg) val.pair.lo, + val_hi = in(reg) val.pair.hi, + tmp = out(reg) _, options(nostack, preserves_flags), ) }; @@ -660,13 +584,12 @@ macro_rules! atomic128 { #[inline] unsafe fn atomic_swap( dst: *mut MaybeUninit, - val: *const MaybeUninit, - out: *mut MaybeUninit, + val: MaybeUninit, order: Ordering, - ) { + ) -> MaybeUninit { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(val as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); + let val = MaybeUninit128 { $int_type: val }; + let (mut prev_lo, mut prev_hi); // SAFETY: the caller must uphold the safety contract. unsafe { @@ -674,18 +597,12 @@ macro_rules! atomic128 { macro_rules! swap { ($acquire:tt, $release:tt, $fence:tt) => { asm!( - // load from val to val pair - "ldp {val_lo}, {val_hi}, [{val}]", // (atomic) swap concat!("swpp", $acquire, $release, " {val_lo}, {val_hi}, [{dst}]"), $fence, - // store out pair to out - "stp {val_lo}, {val_hi}, [{out}]", dst = in(reg) ptr_reg!(dst), - val = in(reg) ptr_reg!(val), - out = in(reg) ptr_reg!(out), - val_hi = out(reg) _, - val_lo = out(reg) _, + val_lo = inout(reg) val.pair.lo => prev_lo, + val_hi = inout(reg) val.pair.hi => prev_hi, options(nostack, preserves_flags), ) }; @@ -694,32 +611,27 @@ macro_rules! atomic128 { macro_rules! swap { ($acquire:tt, $release:tt, $fence:tt) => { asm!( - // load from val to val pair - "ldp {val_lo}, {val_hi}, [{val}]", // (atomic) swap (LL/SC loop) "2:", - // load from dst to out pair - concat!("ld", $acquire, "xp {out_lo}, {out_hi}, [{dst}]"), + // load from dst to prev pair + concat!("ld", $acquire, "xp {prev_lo}, {prev_hi}, [{dst}]"), // try to store val pair to dst concat!("st", $release, "xp {r:w}, {val_lo}, {val_hi}, [{dst}]"), // 0 if the store was successful, 1 if no store was performed "cbnz {r:w}, 2b", $fence, - // store out pair to out - "stp {out_lo}, {out_hi}, [{out}]", - dst = inout(reg) ptr_reg!(dst) => _, - val = in(reg) ptr_reg!(val), - out = inout(reg) ptr_reg!(out) => _, - val_hi = out(reg) _, - val_lo = out(reg) _, - out_hi = out(reg) _, - out_lo = out(reg) _, - r = lateout(reg) _, + dst = in(reg) ptr_reg!(dst), + val_lo = in(reg) val.pair.lo, + val_hi = in(reg) val.pair.hi, + prev_lo = out(reg) prev_lo, + prev_hi = out(reg) prev_hi, + r = out(reg) _, options(nostack, preserves_flags), ) }; } atomic_rmw!(swap, order); + MaybeUninit128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.$int_type } } } @@ -727,17 +639,16 @@ macro_rules! atomic128 { #[inline] unsafe fn atomic_compare_exchange( dst: *mut MaybeUninit, - old: *const MaybeUninit, - new: *const MaybeUninit, - out: *mut MaybeUninit, + old: MaybeUninit, + new: MaybeUninit, success: Ordering, failure: Ordering, - ) -> bool { + ) -> (MaybeUninit, bool) { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(old as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(new as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); let order = crate::utils::upgrade_success_ordering(success, failure); + let old = MaybeUninit128 { $int_type: old }; + let new = MaybeUninit128 { $int_type: new }; + let (mut prev_lo, mut prev_hi); // SAFETY: the caller must uphold the safety contract. unsafe { @@ -746,9 +657,6 @@ macro_rules! atomic128 { macro_rules! cmpxchg { ($acquire:tt, $release:tt, $fence:tt) => {{ asm!( - // load from old/new to old/new pairs - "ldp {old_lo}, {old_hi}, [{old}]", - "ldp x4, x5, [{new}]", // casp writes the current value to the first register pair, // so copy the `old`'s value for later comparison. "mov x8, {old_lo}", @@ -757,48 +665,45 @@ macro_rules! atomic128 { // Refs: https://developer.arm.com/documentation/dui0801/g/A64-Data-Transfer-Instructions/CASPA--CASPAL--CASP--CASPL--CASPAL--CASP--CASPL concat!("casp", $acquire, $release, " x8, x9, x4, x5, [{dst}]"), $fence, - // compare old pair and out pair + // compare old pair and prev pair "cmp x8, {old_lo}", "ccmp x9, {old_hi}, #0, eq", "cset {r:w}, eq", - // store out pair to out - "stp x8, x9, [{out}]", dst = in(reg) ptr_reg!(dst), - old = in(reg) ptr_reg!(old), - new = in(reg) ptr_reg!(new), - out = inout(reg) ptr_reg!(out) => _, - old_lo = out(reg) _, - old_hi = out(reg) _, + old_lo = in(reg) old.pair.lo, + old_hi = in(reg) old.pair.hi, r = lateout(reg) r, // new pair - must be allocated to even/odd register pair - out("x4") _, // new_lo - out("x5") _, // new_hi - // out pair - must be allocated to even/odd register pair - out("x8") _, // out_lo - out("x9") _, // out_hi + in("x4") new.pair.lo, + in("x5") new.pair.hi, + // prev pair - must be allocated to even/odd register pair + out("x8") prev_lo, + out("x9") prev_hi, // Do not use `preserves_flags` because CMP and CCMP modify the condition flags. options(nostack), ); debug_assert!(r == 0 || r == 1, "r={}", r); - r != 0 + ( + MaybeUninit128 { + pair: Pair { lo: prev_lo, hi: prev_hi } + }.$int_type, + r != 0 + ) }}; } #[cfg(not(any(target_feature = "lse", atomic_maybe_uninit_target_feature = "lse")))] macro_rules! cmpxchg { ($acquire:tt, $release:tt, $fence:tt) => {{ asm!( - // load from old/new to old/new pair - "ldp {new_lo}, {new_hi}, [{new}]", - "ldp {old_lo}, {old_hi}, [{old}]", // (atomic) CAS (LL/SC loop) "2:", - concat!("ld", $acquire, "xp {out_lo}, {out_hi}, [{dst}]"), - "cmp {out_lo}, {old_lo}", + concat!("ld", $acquire, "xp {prev_lo}, {prev_hi}, [{dst}]"), + "cmp {prev_lo}, {old_lo}", "cset {r:w}, ne", - "cmp {out_hi}, {old_hi}", + "cmp {prev_hi}, {old_hi}", "cinc {r:w}, {r:w}, ne", "cbz {r:w}, 3f", // jump if compare succeed - concat!("st", $release, "xp {r:w}, {out_lo}, {out_hi}, [{dst}]"), + concat!("st", $release, "xp {r:w}, {prev_lo}, {prev_hi}, [{dst}]"), // 0 if the store was successful, 1 if no store was performed "cbnz {r:w}, 2b", // continue loop if store failed "mov {r:w}, #1", // mark as failed @@ -809,25 +714,25 @@ macro_rules! atomic128 { "cbnz {r:w}, 2b", // continue loop if store failed "4:", $fence, - // store out_tmp to out - "stp {out_lo}, {out_hi}, [{out}]", - dst = inout(reg) ptr_reg!(dst) => _, - old = in(reg) ptr_reg!(old), - old_hi = out(reg) _, - old_lo = out(reg) _, - new = in(reg) ptr_reg!(new), - new_hi = out(reg) _, - new_lo = out(reg) _, - out = inout(reg) ptr_reg!(out) => _, - out_hi = out(reg) _, - out_lo = out(reg) _, - r = lateout(reg) r, + dst = in(reg) ptr_reg!(dst), + old_lo = in(reg) old.pair.lo, + old_hi = in(reg) old.pair.hi, + new_lo = in(reg) new.pair.lo, + new_hi = in(reg) new.pair.hi, + prev_lo = out(reg) prev_lo, + prev_hi = out(reg) prev_hi, + r = out(reg) r, // Do not use `preserves_flags` because CMP modifies the condition flags. options(nostack), ); debug_assert!(r == 0 || r == 1, "r={}", r); // 0 if the store was successful, 1 if no store was performed - r == 0 + ( + MaybeUninit128 { + pair: Pair { lo: prev_lo, hi: prev_hi } + }.$int_type, + r == 0 + ) }}; } atomic_rmw!(cmpxchg, order, write = success) diff --git a/src/arch/arm.rs b/src/arch/arm.rs index 02ad750d..43dbd953 100644 --- a/src/arch/arm.rs +++ b/src/arch/arm.rs @@ -9,11 +9,12 @@ // https://developer.arm.com/documentation/ddi0406/cb/Appendixes/ARMv6-Differences?lang=en // // Generated asm: -// - armv7-a https://godbolt.org/z/P93x9TjWs -// - armv7-r https://godbolt.org/z/1z9q9vTcd -// - armv7-m https://godbolt.org/z/WozEfbMbx -// - armv6 https://godbolt.org/z/T5M337jYK -// - armv6-m https://godbolt.org/z/q88qPah4W +// - armv7-a https://godbolt.org/z/Ws6o5ff6x +// - armv7-r https://godbolt.org/z/EasKf9c3q +// - armv7-m https://godbolt.org/z/Ef3hbfx7f +// - armv6 (cp15_barrier) https://godbolt.org/z/EMvbbTcGx +// - armv6 (__kuser_memory_barrier) https://godbolt.org/z/83z7oxxoh +// - armv6-m https://godbolt.org/z/7nnaEq5zP use core::{ mem::{self, MaybeUninit}, @@ -26,6 +27,8 @@ use core::{ ))] use crate::raw::{AtomicCompareExchange, AtomicSwap}; use crate::raw::{AtomicLoad, AtomicStore}; +#[cfg(not(any(target_feature = "mclass", atomic_maybe_uninit_target_feature = "mclass")))] +use crate::utils::{MaybeUninit64, Pair}; #[cfg(any(target_feature = "v7", atomic_maybe_uninit_target_feature = "v7"))] #[cfg(not(any(target_feature = "mclass", atomic_maybe_uninit_target_feature = "mclass")))] @@ -34,23 +37,41 @@ macro_rules! dmb { "dmb ish" }; } -// ARMv6 does not support `dmb ish`, so use use special instruction equivalent to a DMB. +// Only a full system barrier exists in the M-class architectures. +#[cfg(any(target_feature = "mclass", atomic_maybe_uninit_target_feature = "mclass"))] +macro_rules! dmb { + () => { + "dmb sy" + }; +} +// ARMv6 does not support `dmb`, so use use special instruction equivalent to a DMB. // // Refs: // - https://reviews.llvm.org/D5386 // - https://developer.arm.com/documentation/ddi0360/e/control-coprocessor-cp15/register-descriptions/c7--cache-operations-register?lang=en +#[cfg(not(all( + any(target_os = "linux", target_os = "android"), + not(atomic_maybe_uninit_use_cp15_barrier), +)))] #[cfg(not(any(target_feature = "v7", atomic_maybe_uninit_target_feature = "v7")))] #[cfg(not(any(target_feature = "mclass", atomic_maybe_uninit_target_feature = "mclass")))] macro_rules! dmb { () => { - "mcr p15, #0, r0, c7, c10, #5" + "mcr p15, #0, {zero}, c7, c10, #5" }; } -// Only a full system barrier exists in the M-class architectures. -#[cfg(any(target_feature = "mclass", atomic_maybe_uninit_target_feature = "mclass"))] +// We prefer __kuser_memory_barrier over cp15_barrier because cp15_barrier is +// trapped and emulated by default on Linux/Android with ARMv8+ (or ARMv7+?). +// https://github.com/rust-lang/rust/issues/60605 +#[cfg(all( + any(target_os = "linux", target_os = "android"), + not(atomic_maybe_uninit_use_cp15_barrier), +))] +#[cfg(not(any(target_feature = "v7", atomic_maybe_uninit_target_feature = "v7")))] +#[cfg(not(any(target_feature = "mclass", atomic_maybe_uninit_target_feature = "mclass")))] macro_rules! dmb { () => { - "dmb sy" + "blx {kuser_memory_barrier}" }; } @@ -75,13 +96,9 @@ macro_rules! clrex { }; } -// On ARMv6, dmb! refers to r0, so when calling it, we must clobbering r0. macro_rules! asm_no_dmb { - (options($($options:tt)*), $($asm:tt)*) => { - core::arch::asm!( - $($asm)* - options($($options)*), - ) + ($($asm:tt)*) => { + core::arch::asm!($($asm)*) }; } #[cfg(any( @@ -91,13 +108,33 @@ macro_rules! asm_no_dmb { atomic_maybe_uninit_target_feature = "mclass", ))] macro_rules! asm_use_dmb { - (options($($options:tt)*), $($asm:tt)*) => { + ($($asm:tt)*) => { + core::arch::asm!($($asm)*) + }; +} +#[cfg(not(all( + any(target_os = "linux", target_os = "android"), + not(atomic_maybe_uninit_use_cp15_barrier), +)))] +#[cfg(not(any( + target_feature = "v7", + atomic_maybe_uninit_target_feature = "v7", + target_feature = "mclass", + atomic_maybe_uninit_target_feature = "mclass", +)))] +macro_rules! asm_use_dmb { + ($($asm:tt)*) => { + // In this case, dmb! calls `mcr p15, 0, , c7, c10, 5`, and the value in the Rd register should be zero (SBZ). core::arch::asm!( $($asm)* - options($($options)*), + zero = inout(reg) 0_u32 => _, ) }; } +#[cfg(all( + any(target_os = "linux", target_os = "android"), + not(atomic_maybe_uninit_use_cp15_barrier), +))] #[cfg(not(any( target_feature = "v7", atomic_maybe_uninit_target_feature = "v7", @@ -105,11 +142,14 @@ macro_rules! asm_use_dmb { atomic_maybe_uninit_target_feature = "mclass", )))] macro_rules! asm_use_dmb { - (options($($options:tt)*), $($asm:tt)*) => { + ($($asm:tt)*) => { + // In this case, dmb! calls __kuser_memory_barrier. core::arch::asm!( $($asm)* - inout("r0") 0_u32 => _, - options($($options)*), + // __kuser_memory_barrier (see also arm_linux.rs) + // https://www.kernel.org/doc/Documentation/arm/kernel_user_helpers.txt + kuser_memory_barrier = inout(reg) 0xFFFF0FA0_usize => _, + out("lr") _, ) }; } @@ -120,26 +160,22 @@ macro_rules! atomic { #[inline] unsafe fn atomic_load( src: *const MaybeUninit, - out: *mut MaybeUninit, order: Ordering, - ) { + ) -> MaybeUninit { debug_assert!(src as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); + let out: MaybeUninit; // SAFETY: the caller must uphold the safety contract. unsafe { macro_rules! atomic_load { ($asm:ident, $acquire:expr) => { $asm!( - options(nostack, preserves_flags), - // (atomic) load from src to tmp - concat!("ldr", $asm_suffix, " {tmp}, [{src}]"), + // (atomic) load from src to out + concat!("ldr", $asm_suffix, " {out}, [{src}]"), $acquire, // acquire fence - // store tmp to out - concat!("str", $asm_suffix, " {tmp}, [{out}]"), src = in(reg) src, - out = inout(reg) out => _, - tmp = lateout(reg) _, + out = lateout(reg) out, + options(nostack, preserves_flags), ) }; } @@ -150,33 +186,30 @@ macro_rules! atomic { _ => unreachable!("{:?}", order), } } + out } } impl AtomicStore for $int_type { #[inline] unsafe fn atomic_store( dst: *mut MaybeUninit, - val: *const MaybeUninit, + val: MaybeUninit, order: Ordering, ) { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(val as usize % mem::align_of::<$int_type>() == 0); // SAFETY: the caller must uphold the safety contract. unsafe { macro_rules! atomic_store { ($asm:ident, $acquire:expr, $release:expr) => { $asm!( - options(nostack, preserves_flags), - // load from val to tmp - concat!("ldr", $asm_suffix, " {tmp}, [{val}]"), - // (atomic) store tmp to dst + // (atomic) store val to dst $release, // release fence - concat!("str", $asm_suffix, " {tmp}, [{dst}]"), + concat!("str", $asm_suffix, " {val}, [{dst}]"), $acquire, // acquire fence - dst = inout(reg) dst => _, + dst = in(reg) dst, val = in(reg) val, - tmp = lateout(reg) _, + options(nostack, preserves_flags), ) }; } @@ -197,41 +230,34 @@ macro_rules! atomic { #[inline] unsafe fn atomic_swap( dst: *mut MaybeUninit, - val: *const MaybeUninit, - out: *mut MaybeUninit, + val: MaybeUninit, order: Ordering, - ) { + ) -> MaybeUninit { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(val as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); + let mut out: MaybeUninit; // SAFETY: the caller must uphold the safety contract. unsafe { macro_rules! atomic_swap { ($asm:ident, $acquire:expr, $release:expr) => { $asm!( - // Do not use `preserves_flags` because CMP modifies the condition flags. - options(nostack), - // load from val (ptr) to val (val) - concat!("ldr", $asm_suffix, " {val}, [{val}]"), // (atomic) swap (LL/SC loop) $release, // release fence "2:", - // load from dst to tmp - concat!("ldrex", $asm_suffix, " {tmp}, [{dst}]"), + // load from dst to out + concat!("ldrex", $asm_suffix, " {out}, [{dst}]"), // try to store val to dst concat!("strex", $asm_suffix, " {r}, {val}, [{dst}]"), // 0 if the store was successful, 1 if no store was performed "cmp {r}, 0x0", "bne 2b", $acquire, // acquire fence - // store tmp to out - concat!("str", $asm_suffix, " {tmp}, [{out}]"), dst = in(reg) dst, val = inout(reg) val => _, - out = in(reg) out, + out = out(reg) out, r = out(reg) _, - tmp = out(reg) _, + // Do not use `preserves_flags` because CMP modifies the condition flags. + options(nostack), ) }; } @@ -246,6 +272,7 @@ macro_rules! atomic { _ => unreachable!("{:?}", order), } } + out } } #[rustfmt::skip] @@ -257,16 +284,13 @@ macro_rules! atomic { #[inline] unsafe fn atomic_compare_exchange( dst: *mut MaybeUninit, - old: *const MaybeUninit, - new: *const MaybeUninit, - out: *mut MaybeUninit, + old: MaybeUninit, + new: MaybeUninit, success: Ordering, failure: Ordering, - ) -> bool { + ) -> (MaybeUninit, bool) { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(old as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(new as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); + let mut out: MaybeUninit; // SAFETY: the caller must uphold the safety contract. unsafe { @@ -275,15 +299,10 @@ macro_rules! atomic { macro_rules! cmpxchg_store_relaxed { ($asm:ident, $acquire_success:expr, $acquire_failure:expr) => { $asm!( - // Do not use `preserves_flags` because CMP modifies the condition flags. - options(nostack), - // load from old/new (ptr) to old/new (val) - concat!("ldr", $asm_suffix, " {old}, [{old}]"), - concat!("ldr", $asm_suffix, " {new}, [{new}]"), // (atomic) CAS (LL/SC loop) "2:", - concat!("ldrex", $asm_suffix, " {tmp}, [{dst}]"), - "cmp {tmp}, {old}", + concat!("ldrex", $asm_suffix, " {out}, [{dst}]"), + "cmp {out}, {old}", "bne 3f", // jump if compare failed concat!("strex", $asm_suffix, " {r}, {new}, [{dst}]"), // 0 if the store was successful, 1 if no store was performed @@ -297,28 +316,22 @@ macro_rules! atomic { clrex!(), $acquire_failure, "4:", - // store tmp to out - concat!("str", $asm_suffix, " {tmp}, [{out}]"), dst = in(reg) dst, - r = out(reg) r, - old = inout(reg) old => _, + old = in(reg) crate::utils::zero_extend(old), new = inout(reg) new => _, - out = in(reg) out, - tmp = out(reg) _, + out = out(reg) out, + r = out(reg) r, + // Do not use `preserves_flags` because CMP modifies the condition flags. + options(nostack), ) }; } macro_rules! cmpxchg_release { ($acquire_failure:expr) => { asm_use_dmb!( - // Do not use `preserves_flags` because CMP modifies the condition flags. - options(nostack), - // load from old/new (ptr) to old/new (val) - concat!("ldr", $asm_suffix, " {old}, [{old}]"), - concat!("ldr", $asm_suffix, " {new}, [{new}]"), // (atomic) CAS (LL/SC loop) - concat!("ldrex", $asm_suffix, " {tmp}, [{dst}]"), - "cmp {tmp}, {old}", + concat!("ldrex", $asm_suffix, " {out}, [{dst}]"), + "cmp {out}, {old}", "bne 3f", // jump if compare failed dmb!(), // release "2:", @@ -326,8 +339,8 @@ macro_rules! atomic { // 0 if the store was successful, 1 if no store was performed "cmp {r}, #0", "beq 4f", // jump if store succeed - concat!("ldrex", $asm_suffix, " {tmp}, [{dst}]"), - "cmp {tmp}, {old}", + concat!("ldrex", $asm_suffix, " {out}, [{dst}]"), + "cmp {out}, {old}", "beq 2b", // continue loop if compare succeed "3:", // compare failed, set r to 1 @@ -335,28 +348,22 @@ macro_rules! atomic { clrex!(), $acquire_failure, "4:", - // store tmp to out - concat!("str", $asm_suffix, " {tmp}, [{out}]"), dst = in(reg) dst, - r = out(reg) r, - old = inout(reg) old => _, + old = in(reg) crate::utils::zero_extend(old), new = inout(reg) new => _, - out = in(reg) out, - tmp = out(reg) _, + out = out(reg) out, + r = out(reg) r, + // Do not use `preserves_flags` because CMP modifies the condition flags. + options(nostack), ) }; } macro_rules! cmpxchg_acqrel { ($acquire_failure:expr) => { asm_use_dmb!( - // Do not use `preserves_flags` because CMP modifies the condition flags. - options(nostack), - // load from old/new (ptr) to old/new (val) - concat!("ldr", $asm_suffix, " {old}, [{old}]"), - concat!("ldr", $asm_suffix, " {new}, [{new}]"), // (atomic) CAS (LL/SC loop) - concat!("ldrex", $asm_suffix, " {tmp}, [{dst}]"), - "cmp {tmp}, {old}", + concat!("ldrex", $asm_suffix, " {out}, [{dst}]"), + "cmp {out}, {old}", "bne 3f", // jump if compare failed dmb!(), // release "2:", @@ -364,8 +371,8 @@ macro_rules! atomic { // 0 if the store was successful, 1 if no store was performed "cmp {r}, #0", "beq 4f", // jump if store succeed - concat!("ldrex", $asm_suffix, " {tmp}, [{dst}]"), - "cmp {tmp}, {old}", + concat!("ldrex", $asm_suffix, " {out}, [{dst}]"), + "cmp {out}, {old}", "beq 2b", // continue loop if compare succeed "3:", // compare failed, set r to 1 @@ -376,14 +383,13 @@ macro_rules! atomic { "4:", // store succeed dmb!(), // acquire_success "5:", - // store tmp to out - concat!("str", $asm_suffix, " {tmp}, [{out}]"), dst = in(reg) dst, - r = out(reg) r, - old = inout(reg) old => _, + old = in(reg) crate::utils::zero_extend(old), new = inout(reg) new => _, - out = in(reg) out, - tmp = out(reg) _, + out = out(reg) out, + r = out(reg) r, + // Do not use `preserves_flags` because CMP modifies the condition flags. + options(nostack), ) }; } @@ -405,22 +411,19 @@ macro_rules! atomic { } debug_assert!(r == 0 || r == 1, "r={}", r); // 0 if the store was successful, 1 if no store was performed - r == 0 + (out, r == 0) } } #[inline] unsafe fn atomic_compare_exchange_weak( dst: *mut MaybeUninit, - old: *const MaybeUninit, - new: *const MaybeUninit, - out: *mut MaybeUninit, + old: MaybeUninit, + new: MaybeUninit, success: Ordering, failure: Ordering, - ) -> bool { + ) -> (MaybeUninit, bool) { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(old as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(new as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); + let mut out: MaybeUninit; // SAFETY: the caller must uphold the safety contract. unsafe { @@ -429,13 +432,8 @@ macro_rules! atomic { macro_rules! cmpxchg_weak { ($asm:ident, $acquire:expr, $release:expr) => { $asm!( - // Do not use `preserves_flags` because CMP modifies the condition flags. - options(nostack), - // load from old/new (ptr) to old/new (val) - concat!("ldr", $asm_suffix, " {old}, [{old}]"), - concat!("ldr", $asm_suffix, " {new}, [{new}]"), - concat!("ldrex", $asm_suffix, " {tmp}, [{dst}]"), - "cmp {tmp}, {old}", + concat!("ldrex", $asm_suffix, " {out}, [{dst}]"), + "cmp {out}, {old}", "bne 3f", // jump if compare failed $release, concat!("strex", $asm_suffix, " {r}, {new}, [{dst}]"), @@ -446,27 +444,21 @@ macro_rules! atomic { clrex!(), "4:", $acquire, - // store tmp to out - concat!("str", $asm_suffix, " {tmp}, [{out}]"), dst = in(reg) dst, - r = out(reg) r, - old = inout(reg) old => _, + old = in(reg) crate::utils::zero_extend(old), new = inout(reg) new => _, - out = in(reg) out, - tmp = out(reg) _, + out = out(reg) out, + r = out(reg) r, + // Do not use `preserves_flags` because CMP modifies the condition flags. + options(nostack), ) }; } macro_rules! cmpxchg_weak_fail_load_relaxed { ($release:expr) => { asm_use_dmb!( - // Do not use `preserves_flags` because CMP modifies the condition flags. - options(nostack), - // load from old/new (ptr) to old/new (val) - concat!("ldr", $asm_suffix, " {old}, [{old}]"), - concat!("ldr", $asm_suffix, " {new}, [{new}]"), - concat!("ldrex", $asm_suffix, " {tmp}, [{dst}]"), - "cmp {tmp}, {old}", + concat!("ldrex", $asm_suffix, " {out}, [{dst}]"), + "cmp {out}, {old}", "bne 3f", // jump if compare failed $release, concat!("strex", $asm_suffix, " {r}, {new}, [{dst}]"), @@ -482,27 +474,21 @@ macro_rules! atomic { "4:", // store succeed dmb!(), // acquire_success "5:", - // store tmp to out - concat!("str", $asm_suffix, " {tmp}, [{out}]"), dst = in(reg) dst, - r = out(reg) r, - old = inout(reg) old => _, + old = in(reg) crate::utils::zero_extend(old), new = inout(reg) new => _, - out = in(reg) out, - tmp = out(reg) _, + out = out(reg) out, + r = out(reg) r, + // Do not use `preserves_flags` because CMP modifies the condition flags. + options(nostack), ) }; } macro_rules! cmpxchg_weak_success_load_relaxed { ($release:expr) => { asm_use_dmb!( - // Do not use `preserves_flags` because CMP modifies the condition flags. - options(nostack), - // load from old/new (ptr) to old/new (val) - concat!("ldr", $asm_suffix, " {old}, [{old}]"), - concat!("ldr", $asm_suffix, " {new}, [{new}]"), - concat!("ldrex", $asm_suffix, " {tmp}, [{dst}]"), - "cmp {tmp}, {old}", + concat!("ldrex", $asm_suffix, " {out}, [{dst}]"), + "cmp {out}, {old}", "bne 3f", // jump if compare failed $release, concat!("strex", $asm_suffix, " {r}, {new}, [{dst}]"), @@ -517,14 +503,13 @@ macro_rules! atomic { "4:", // compare or store failed dmb!(), // acquire_failure "5:", - // store tmp to out - concat!("str", $asm_suffix, " {tmp}, [{out}]"), dst = in(reg) dst, - r = out(reg) r, - old = inout(reg) old => _, + old = in(reg) crate::utils::zero_extend(old), new = inout(reg) new => _, - out = in(reg) out, - tmp = out(reg) _, + out = out(reg) out, + r = out(reg) r, + // Do not use `preserves_flags` because CMP modifies the condition flags. + options(nostack), ) }; } @@ -542,7 +527,7 @@ macro_rules! atomic { } debug_assert!(r == 0 || r == 1, "r={}", r); // 0 if the store was successful, 1 if no store was performed - r == 0 + (out, r == 0) } } } @@ -569,29 +554,25 @@ macro_rules! atomic64 { #[inline] unsafe fn atomic_load( src: *const MaybeUninit, - out: *mut MaybeUninit, order: Ordering, - ) { + ) -> MaybeUninit { debug_assert!(src as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); + let (prev_lo, prev_hi); // SAFETY: the caller must uphold the safety contract. unsafe { macro_rules! atomic_load { ($asm:ident, $acquire:expr) => { $asm!( - options(nostack, preserves_flags), // (atomic) load from src to tmp pair "ldrexd r2, r3, [{src}]", clrex!(), $acquire, // acquire fence - // store tmp pair to out - "strd r2, r3, [{out}]", src = in(reg) src, - out = in(reg) out, - // tmp pair - must be even-numbered and not R14 - out("r2") _, - out("r3") _, + // prev pair - must be even-numbered and not R14 + out("r2") prev_lo, + out("r3") prev_hi, + options(nostack, preserves_flags), ) }; } @@ -601,6 +582,7 @@ macro_rules! atomic64 { Ordering::Acquire | Ordering::SeqCst => atomic_load!(asm_use_dmb, dmb!()), _ => unreachable!("{:?}", order), } + MaybeUninit64 { pair: Pair { lo: prev_lo, hi: prev_hi } }.$int_type } } } @@ -609,41 +591,37 @@ macro_rules! atomic64 { #[inline] unsafe fn atomic_store( dst: *mut MaybeUninit, - val: *const MaybeUninit, + val: MaybeUninit, order: Ordering, ) { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(val as usize % mem::align_of::<$int_type>() == 0); + let val = MaybeUninit64 { $int_type: val }; // SAFETY: the caller must uphold the safety contract. unsafe { macro_rules! atomic_store { ($asm:ident, $acquire:expr, $release:expr) => { $asm!( - // Do not use `preserves_flags` because CMP modifies the condition flags. - options(nostack), - // load from val to val pair - "ldrd r2, r3, [{val}]", // (atomic) store val pair to dst (LL/SC loop) $release, // release fence "2:", // load from dst to tmp pair "ldrexd r4, r5, [{dst}]", // try to store val pair to dst - "strexd {r}, r2, r3, [{dst}]", + "strexd r4, r2, r3, [{dst}]", // 0 if the store was successful, 1 if no store was performed - "cmp {r}, 0x0", + "cmp r4, 0x0", "bne 2b", $acquire, // acquire fence - dst = inout(reg) dst => _, - val = in(reg) val, - r = lateout(reg) _, + dst = in(reg) dst, // val pair - must be even-numbered and not R14 - out("r2") _, - out("r3") _, + inout("r2") val.pair.lo => _, + inout("r3") val.pair.hi => _, // tmp pair - must be even-numbered and not R14 out("r4") _, out("r5") _, + // Do not use `preserves_flags` because CMP modifies the condition flags. + options(nostack), ) }; } @@ -661,27 +639,22 @@ macro_rules! atomic64 { #[inline] unsafe fn atomic_swap( dst: *mut MaybeUninit, - val: *const MaybeUninit, - out: *mut MaybeUninit, + val: MaybeUninit, order: Ordering, - ) { + ) -> MaybeUninit { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(val as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); + let val = MaybeUninit64 { $int_type: val }; + let (mut prev_lo, mut prev_hi); // SAFETY: the caller must uphold the safety contract. unsafe { macro_rules! atomic_swap { ($asm:ident, $acquire:expr, $release:expr) => { $asm!( - // Do not use `preserves_flags` because CMP modifies the condition flags. - options(nostack), - // load from val to val pair - "ldrd r2, r3, [{val}]", // (atomic) swap (LL/SC loop) $release, // release fence "2:", - // load from dst to out pair + // load from dst to prev pair "ldrexd r4, r5, [{dst}]", // try to store val pair to dst "strexd {r}, r2, r3, [{dst}]", @@ -689,18 +662,16 @@ macro_rules! atomic64 { "cmp {r}, 0x0", "bne 2b", $acquire, // acquire fence - // store out pair to out - "strd r4, r5, [{out}]", - dst = inout(reg) dst => _, - val = in(reg) val, - out = inout(reg) out => _, - r = lateout(reg) _, + dst = in(reg) dst, + r = out(reg) _, // val pair - must be even-numbered and not R14 - out("r2") _, - out("r3") _, - // out pair - must be even-numbered and not R14 - out("r4") _, - out("r5") _, + inout("r2") val.pair.lo => _, + inout("r3") val.pair.hi => _, + // prev pair - must be even-numbered and not R14 + out("r4") prev_lo, + out("r5") prev_hi, + // Do not use `preserves_flags` because CMP modifies the condition flags. + options(nostack), ) }; } @@ -712,6 +683,7 @@ macro_rules! atomic64 { Ordering::AcqRel | Ordering::SeqCst => atomic_swap!(asm_use_dmb, dmb!(), dmb!()), _ => unreachable!("{:?}", order), } + MaybeUninit64 { pair: Pair { lo: prev_lo, hi: prev_hi } }.$int_type } } } @@ -720,16 +692,15 @@ macro_rules! atomic64 { #[inline] unsafe fn atomic_compare_exchange( dst: *mut MaybeUninit, - old: *const MaybeUninit, - new: *const MaybeUninit, - out: *mut MaybeUninit, + old: MaybeUninit, + new: MaybeUninit, success: Ordering, failure: Ordering, - ) -> bool { + ) -> (MaybeUninit, bool) { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(old as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(new as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); + let old = MaybeUninit64 { $int_type: old }; + let new = MaybeUninit64 { $int_type: new }; + let (mut prev_lo, mut prev_hi); // SAFETY: the caller must uphold the safety contract. unsafe { @@ -738,15 +709,11 @@ macro_rules! atomic64 { macro_rules! cmpxchg_store_relaxed { ($asm:ident, $acquire_success:expr, $acquire_failure:expr) => { $asm!( - // Do not use `preserves_flags` because CMP and ORRS modify the condition flags. - options(nostack), - "ldrd r2, r3, [{old}]", - "ldrd r8, r9, [{new}]", // (atomic) CAS (LL/SC loop) "2:", "ldrexd r4, r5, [{dst}]", - "eor {tmp}, r5, r3", - "eor {r}, r4, r2", + "eor {tmp}, r5, {old_hi}", + "eor {r}, r4, {old_lo}", "orrs {r}, {r}, {tmp}", "bne 3f", // jump if compare failed "strexd {r}, r8, r9, [{dst}]", @@ -761,37 +728,29 @@ macro_rules! atomic64 { clrex!(), $acquire_failure, "4:", - // store out pair to out - "strd r4, r5, [{out}]", - dst = inout(reg) dst => _, - r = lateout(reg) r, - old = in(reg) old, - new = in(reg) new, - out = inout(reg) out => _, + dst = in(reg) dst, + old_lo = in(reg) old.pair.lo, + old_hi = in(reg) old.pair.hi, + r = out(reg) r, tmp = out(reg) _, - // old pair - must be even-numbered and not R14 - out("r2") _, - out("r3") _, - // out pair - must be even-numbered and not R14 - out("r4") _, - out("r5") _, + // prev pair - must be even-numbered and not R14 + out("r4") prev_lo, + out("r5") prev_hi, // new pair - must be even-numbered and not R14 - out("r8") _, - out("r9") _, + inout("r8") new.pair.lo => _, + inout("r9") new.pair.hi => _, + // Do not use `preserves_flags` because CMP and ORRS modify the condition flags. + options(nostack), ) }; } macro_rules! cmpxchg_release { ($acquire_failure:expr) => { asm_use_dmb!( - // Do not use `preserves_flags` because CMP and ORRS modify the condition flags. - options(nostack), - "ldrd r2, r3, [{old}]", - "ldrd r8, r9, [{new}]", // (atomic) CAS (LL/SC loop) "ldrexd r4, r5, [{dst}]", - "eor {tmp}, r5, r3", - "eor {r}, r4, r2", + "eor {tmp}, r5, {old_hi}", + "eor {r}, r4, {old_lo}", "orrs {r}, {r}, {tmp}", "bne 3f", // jump if compare failed dmb!(), // release @@ -801,8 +760,8 @@ macro_rules! atomic64 { "cmp {r}, #0", "beq 4f", // jump if store succeed "ldrexd r4, r5, [{dst}]", - "eor {tmp}, r5, r3", - "eor {r}, r4, r2", + "eor {tmp}, r5, {old_hi}", + "eor {r}, r4, {old_lo}", "orrs {r}, {r}, {tmp}", "beq 2b", // continue loop if compare succeed "3:", @@ -811,37 +770,29 @@ macro_rules! atomic64 { clrex!(), $acquire_failure, "4:", - // store out pair to out - "strd r4, r5, [{out}]", - dst = inout(reg) dst => _, - r = lateout(reg) r, - old = in(reg) old, - new = in(reg) new, - out = inout(reg) out => _, + dst = in(reg) dst, + old_lo = in(reg) old.pair.lo, + old_hi = in(reg) old.pair.hi, + r = out(reg) r, tmp = out(reg) _, - // old pair - must be even-numbered and not R14 - out("r2") _, - out("r3") _, - // out pair - must be even-numbered and not R14 - out("r4") _, - out("r5") _, + // prev pair - must be even-numbered and not R14 + out("r4") prev_lo, + out("r5") prev_hi, // new pair - must be even-numbered and not R14 - out("r8") _, - out("r9") _, + inout("r8") new.pair.lo => _, + inout("r9") new.pair.hi => _, + // Do not use `preserves_flags` because CMP and ORRS modify the condition flags. + options(nostack), ) }; } macro_rules! cmpxchg_acqrel { ($acquire_failure:expr) => { asm_use_dmb!( - // Do not use `preserves_flags` because CMP and ORRS modify the condition flags. - options(nostack), - "ldrd r2, r3, [{old}]", - "ldrd r8, r9, [{new}]", // (atomic) CAS (LL/SC loop) "ldrexd r4, r5, [{dst}]", - "eor {tmp}, r5, r3", - "eor {r}, r4, r2", + "eor {tmp}, r5, {old_hi}", + "eor {r}, r4, {old_lo}", "orrs {r}, {r}, {tmp}", "bne 3f", // jump if compare failed dmb!(), // release @@ -851,8 +802,8 @@ macro_rules! atomic64 { "cmp {r}, #0", "beq 4f", // jump if store succeed "ldrexd r4, r5, [{dst}]", - "eor {tmp}, r5, r3", - "eor {r}, r4, r2", + "eor {tmp}, r5, {old_hi}", + "eor {r}, r4, {old_lo}", "orrs {r}, {r}, {tmp}", "beq 2b", // continue loop if compare succeed "3:", @@ -864,23 +815,19 @@ macro_rules! atomic64 { "4:", // store succeed dmb!(), // acquire_success "5:", - // store out pair to out - "strd r4, r5, [{out}]", - dst = inout(reg) dst => _, - r = lateout(reg) r, - old = in(reg) old, - new = in(reg) new, - out = inout(reg) out => _, + dst = in(reg) dst, + old_lo = in(reg) old.pair.lo, + old_hi = in(reg) old.pair.hi, + r = out(reg) r, tmp = out(reg) _, - // old pair - must be even-numbered and not R14 - out("r2") _, - out("r3") _, - // out pair - must be even-numbered and not R14 - out("r4") _, - out("r5") _, + // prev pair - must be even-numbered and not R14 + out("r4") prev_lo, + out("r5") prev_hi, // new pair - must be even-numbered and not R14 - out("r8") _, - out("r9") _, + inout("r8") new.pair.lo => _, + inout("r9") new.pair.hi => _, + // Do not use `preserves_flags` because CMP and ORRS modify the condition flags. + options(nostack), ) }; } @@ -898,22 +845,21 @@ macro_rules! atomic64 { } debug_assert!(r == 0 || r == 1, "r={}", r); // 0 if the store was successful, 1 if no store was performed - r == 0 + (MaybeUninit64 { pair: Pair { lo: prev_lo, hi: prev_hi } }.$int_type, r == 0) } } #[inline] unsafe fn atomic_compare_exchange_weak( dst: *mut MaybeUninit, - old: *const MaybeUninit, - new: *const MaybeUninit, - out: *mut MaybeUninit, + old: MaybeUninit, + new: MaybeUninit, success: Ordering, failure: Ordering, - ) -> bool { + ) -> (MaybeUninit, bool) { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(old as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(new as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); + let old = MaybeUninit64 { $int_type: old }; + let new = MaybeUninit64 { $int_type: new }; + let (mut prev_lo, mut prev_hi); // SAFETY: the caller must uphold the safety contract. unsafe { @@ -922,13 +868,9 @@ macro_rules! atomic64 { macro_rules! cmpxchg_weak { ($asm:ident, $acquire:expr, $release:expr) => { $asm!( - // Do not use `preserves_flags` because ORRS modifies the condition flags. - options(nostack), - "ldrd r2, r3, [{old}]", - "ldrd r8, r9, [{new}]", "ldrexd r4, r5, [{dst}]", - "eor {tmp}, r5, r3", - "eor {r}, r4, r2", + "eor {tmp}, r5, {old_hi}", + "eor {r}, r4, {old_lo}", "orrs {r}, {r}, {tmp}", "bne 3f", // jump if compare failed $release, @@ -940,36 +882,28 @@ macro_rules! atomic64 { clrex!(), "4:", $acquire, - // store out pair to out - "strd r4, r5, [{out}]", - dst = inout(reg) dst => _, - r = lateout(reg) r, - old = in(reg) old, - new = in(reg) new, - out = inout(reg) out => _, + dst = in(reg) dst, + old_lo = in(reg) old.pair.lo, + old_hi = in(reg) old.pair.hi, + r = out(reg) r, tmp = out(reg) _, - // old pair - must be even-numbered and not R14 - out("r2") _, - out("r3") _, - // out pair - must be even-numbered and not R14 - out("r4") _, - out("r5") _, + // prev pair - must be even-numbered and not R14 + out("r4") prev_lo, + out("r5") prev_hi, // new pair - must be even-numbered and not R14 - out("r8") _, - out("r9") _, + inout("r8") new.pair.lo => _, + inout("r9") new.pair.hi => _, + // Do not use `preserves_flags` because ORRS modifies the condition flags. + options(nostack), ) }; } macro_rules! cmpxchg_weak_fail_load_relaxed { ($release:expr) => { asm_use_dmb!( - // Do not use `preserves_flags` because CMP and ORRS modify the condition flags. - options(nostack), - "ldrd r2, r3, [{old}]", - "ldrd r8, r9, [{new}]", "ldrexd r4, r5, [{dst}]", - "eor {tmp}, r5, r3", - "eor {r}, r4, r2", + "eor {tmp}, r5, {old_hi}", + "eor {r}, r4, {old_lo}", "orrs {r}, {r}, {tmp}", "bne 3f", // jump if compare failed $release, @@ -986,36 +920,28 @@ macro_rules! atomic64 { "4:", // store succeed dmb!(), // acquire_success "5:", - // store out pair to out - "strd r4, r5, [{out}]", - dst = inout(reg) dst => _, - r = lateout(reg) r, - old = in(reg) old, - new = in(reg) new, - out = inout(reg) out => _, + dst = in(reg) dst, + old_lo = in(reg) old.pair.lo, + old_hi = in(reg) old.pair.hi, + r = out(reg) r, tmp = out(reg) _, - // old pair - must be even-numbered and not R14 - out("r2") _, - out("r3") _, - // out pair - must be even-numbered and not R14 - out("r4") _, - out("r5") _, + // prev pair - must be even-numbered and not R14 + out("r4") prev_lo, + out("r5") prev_hi, // new pair - must be even-numbered and not R14 - out("r8") _, - out("r9") _, + inout("r8") new.pair.lo => _, + inout("r9") new.pair.hi => _, + // Do not use `preserves_flags` because CMP and ORRS modify the condition flags. + options(nostack), ) }; } macro_rules! cmpxchg_weak_success_load_relaxed { ($release:expr) => { asm_use_dmb!( - // Do not use `preserves_flags` because CMP and ORRS modify the condition flags. - options(nostack), - "ldrd r2, r3, [{old}]", - "ldrd r8, r9, [{new}]", "ldrexd r4, r5, [{dst}]", - "eor {tmp}, r5, r3", - "eor {r}, r4, r2", + "eor {tmp}, r5, {old_hi}", + "eor {r}, r4, {old_lo}", "orrs {r}, {r}, {tmp}", "bne 3f", // jump if compare failed $release, @@ -1031,23 +957,19 @@ macro_rules! atomic64 { "4:", // compare or store failed dmb!(), // acquire_failure "5:", - // store out pair to out - "strd r4, r5, [{out}]", - dst = inout(reg) dst => _, - r = lateout(reg) r, - old = in(reg) old, - new = in(reg) new, - out = inout(reg) out => _, + dst = in(reg) dst, + old_lo = in(reg) old.pair.lo, + old_hi = in(reg) old.pair.hi, + r = out(reg) r, tmp = out(reg) _, - // old pair - must be even-numbered and not R14 - out("r2") _, - out("r3") _, - // out pair - must be even-numbered and not R14 - out("r4") _, - out("r5") _, + // prev pair - must be even-numbered and not R14 + out("r4") prev_lo, + out("r5") prev_hi, // new pair - must be even-numbered and not R14 - out("r8") _, - out("r9") _, + inout("r8") new.pair.lo => _, + inout("r9") new.pair.hi => _, + // Do not use `preserves_flags` because CMP and ORRS modify the condition flags. + options(nostack), ) }; } @@ -1065,7 +987,7 @@ macro_rules! atomic64 { } debug_assert!(r == 0 || r == 1, "r={}", r); // 0 if the store was successful, 1 if no store was performed - r == 0 + (MaybeUninit64 { pair: Pair { lo: prev_lo, hi: prev_hi } }.$int_type, r == 0) } } } diff --git a/src/arch/arm_linux.rs b/src/arch/arm_linux.rs index 09546f0a..dcbffc38 100644 --- a/src/arch/arm_linux.rs +++ b/src/arch/arm_linux.rs @@ -9,8 +9,8 @@ // https://developer.arm.com/documentation/ddi0406/cb/Appendixes/ARMv4-and-ARMv5-Differences?lang=en // // Generated asm: -// - armv5te https://godbolt.org/z/r61s7cnG8 -// - armv4t https://godbolt.org/z/xrxfKx1rc +// - armv5te https://godbolt.org/z/63Kojd799 +// - armv4t https://godbolt.org/z/M9Trn87To #[path = "partword.rs"] mod partword; @@ -21,7 +21,10 @@ use core::{ sync::atomic::Ordering, }; -use crate::raw::{AtomicCompareExchange, AtomicLoad, AtomicStore, AtomicSwap}; +use crate::{ + raw::{AtomicCompareExchange, AtomicLoad, AtomicStore, AtomicSwap}, + utils::MaybeUninit64, +}; type XSize = usize; @@ -60,24 +63,20 @@ macro_rules! atomic_load_store { #[inline] unsafe fn atomic_load( src: *const MaybeUninit, - out: *mut MaybeUninit, order: Ordering, - ) { + ) -> MaybeUninit { debug_assert!(src as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); + let out: MaybeUninit; // SAFETY: the caller must uphold the safety contract. unsafe { match order { Ordering::Relaxed => { asm!( - // (atomic) load from src to tmp - concat!("ldr", $asm_suffix, " {tmp}, [{src}]"), - // store tmp to out - concat!("str", $asm_suffix, " {tmp}, [{out}]"), + // (atomic) load from src to out + concat!("ldr", $asm_suffix, " {out}, [{src}]"), src = in(reg) src, - out = inout(reg) out => _, - tmp = lateout(reg) _, + out = lateout(reg) out, options(nostack, preserves_flags), ); } @@ -85,14 +84,11 @@ macro_rules! atomic_load_store { Ordering::Acquire | Ordering::SeqCst => { debug_assert!(kuser_helper_version() >= 3); asm!( - // (atomic) load from src to tmp - concat!("ldr", $asm_suffix, " {tmp}, [{src}]"), + // (atomic) load from src to out + concat!("ldr", $asm_suffix, " {out}, [{src}]"), blx!("{kuser_memory_barrier}"), // acquire fence - // store tmp to out - concat!("str", $asm_suffix, " {tmp}, [{out}]"), src = in(reg) src, - out = inout(reg) out => _, - tmp = lateout(reg) _, + out = lateout(reg) out, kuser_memory_barrier = inout(reg) KUSER_MEMORY_BARRIER => _, out("lr") _, options(nostack, preserves_flags), @@ -101,17 +97,17 @@ macro_rules! atomic_load_store { _ => unreachable!("{:?}", order), } } + out } } impl AtomicStore for $int_type { #[inline] unsafe fn atomic_store( dst: *mut MaybeUninit, - val: *const MaybeUninit, + val: MaybeUninit, order: Ordering, ) { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(val as usize % mem::align_of::<$int_type>() == 0); // SAFETY: the caller must uphold the safety contract. unsafe { @@ -119,16 +115,13 @@ macro_rules! atomic_load_store { ($acquire:expr) => {{ debug_assert!(kuser_helper_version() >= 3); asm!( - // load from val to tmp - concat!("ldr", $asm_suffix, " {tmp}, [{val}]"), - // (atomic) store tmp to dst + // (atomic) store val to dst blx!("{kuser_memory_barrier}"), // release fence - concat!("str", $asm_suffix, " {tmp}, [{dst}]"), + concat!("str", $asm_suffix, " {val}, [{dst}]"), $acquire, // acquire fence - dst = inout(reg) dst => _, + dst = in(reg) dst, val = in(reg) val, - tmp = lateout(reg) _, - kuser_memory_barrier = inout(reg) KUSER_MEMORY_BARRIER => _, + kuser_memory_barrier = in(reg) KUSER_MEMORY_BARRIER, out("lr") _, options(nostack, preserves_flags), ) @@ -137,13 +130,10 @@ macro_rules! atomic_load_store { match order { Ordering::Relaxed => { asm!( - // load from val to tmp - concat!("ldr", $asm_suffix, " {tmp}, [{val}]"), - // (atomic) store tmp to dst - concat!("str", $asm_suffix, " {tmp}, [{dst}]"), - dst = inout(reg) dst => _, + // (atomic) store val to dst + concat!("str", $asm_suffix, " {val}, [{dst}]"), + dst = in(reg) dst, val = in(reg) val, - tmp = lateout(reg) _, options(nostack, preserves_flags), ); } @@ -164,31 +154,26 @@ macro_rules! atomic { #[inline] unsafe fn atomic_swap( dst: *mut MaybeUninit, - val: *const MaybeUninit, - out: *mut MaybeUninit, + val: MaybeUninit, _order: Ordering, - ) { + ) -> MaybeUninit { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(val as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); debug_assert!(kuser_helper_version() >= 2); + let mut out: MaybeUninit; // SAFETY: the caller must uphold the safety contract. unsafe { asm!( - "ldr r1, [r1]", // new_val "2:", "ldr r0, [r2]", // old_val - "mov {out_tmp}, r0", + "mov {out}, r0", blx!("{kuser_cmpxchg}"), "cmp r0, #0", "bne 2b", - "str {out_tmp}, [{out}]", - out = in(reg) out, - out_tmp = out(reg) _, + out = out(reg) out, kuser_cmpxchg = in(reg) KUSER_CMPXCHG, out("r0") _, - inout("r1") val => _, + in("r1") val, // new_val in("r2") dst, // ptr out("r3") _, out("ip") _, @@ -197,33 +182,29 @@ macro_rules! atomic { options(nostack), ); } + out } } impl AtomicCompareExchange for $int_type { #[inline] unsafe fn atomic_compare_exchange( dst: *mut MaybeUninit, - old: *const MaybeUninit, - new: *const MaybeUninit, - out: *mut MaybeUninit, + old: MaybeUninit, + new: MaybeUninit, _success: Ordering, _failure: Ordering, - ) -> bool { + ) -> (MaybeUninit, bool) { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(old as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(new as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); debug_assert!(kuser_helper_version() >= 2); + let mut out: MaybeUninit; // SAFETY: the caller must uphold the safety contract. unsafe { let mut r: i32; asm!( - "ldr {old}, [{old}]", - "ldr {new}, [{new}]", "2:", "ldr r0, [r2]", // old_val - "mov {out_tmp}, r0", + "mov {out}, r0", "cmp r0, {old}", "bne 3f", "mov r1, {new}", // new_val @@ -239,11 +220,9 @@ macro_rules! atomic { "bne 2b", "mov r0, #1", "4:", - "str {out_tmp}, [{out}]", - old = inout(reg) old => _, - new = inout(reg) new => _, - out = in(reg) out, - out_tmp = out(reg) _, + old = in(reg) old, + new = in(reg) new, + out = out(reg) out, kuser_cmpxchg = in(reg) KUSER_CMPXCHG, out("r0") r, out("r1") _, @@ -256,7 +235,7 @@ macro_rules! atomic { ); debug_assert!(r == 0 || r == 1, "r={}", r); // 0 if the store was successful, 1 if no store was performed - r == 0 + (out, r == 0) } } } @@ -270,40 +249,35 @@ macro_rules! atomic_sub_word { #[inline] unsafe fn atomic_swap( dst: *mut MaybeUninit, - val: *const MaybeUninit, - out: *mut MaybeUninit, + val: MaybeUninit, _order: Ordering, - ) { + ) -> MaybeUninit { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(val as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); debug_assert!(kuser_helper_version() >= 2); let (aligned_ptr, shift, mask) = partword::create_mask_values(dst); + let mut out: MaybeUninit; // SAFETY: the caller must uphold the safety contract. unsafe { asm!( - concat!("ldr", $asm_suffix, " {val}, [{val}]"), "lsl {mask}, {mask}, {shift}", "lsl {val}, {val}, {shift}", "and {val}, {val}, {mask}", "mvn {inv_mask}, {mask}", "2:", "ldr r0, [r2]", // old_val - "mov {out_tmp}, r0", + "mov {out}, r0", "and r1, r0, {inv_mask}", "orr r1, r1, {val}", // new_val blx!("{kuser_cmpxchg}"), "cmp r0, #0", "bne 2b", - "lsr {out_tmp}, {out_tmp}, {shift}", - concat!("str", $asm_suffix, " {out_tmp}, [{out}]"), - val = inout(reg) val => _, - out = in(reg) out, + "lsr {out}, {out}, {shift}", + val = inout(reg) crate::utils::zero_extend(val) => _, + out = out(reg) out, shift = in(reg) shift, mask = inout(reg) mask => _, inv_mask = out(reg) _, - out_tmp = out(reg) _, kuser_cmpxchg = in(reg) KUSER_CMPXCHG, out("r0") _, out("r1") _, @@ -315,31 +289,27 @@ macro_rules! atomic_sub_word { options(nostack), ); } + out } } impl AtomicCompareExchange for $int_type { #[inline] unsafe fn atomic_compare_exchange( dst: *mut MaybeUninit, - old: *const MaybeUninit, - new: *const MaybeUninit, - out: *mut MaybeUninit, + old: MaybeUninit, + new: MaybeUninit, _success: Ordering, _failure: Ordering, - ) -> bool { + ) -> (MaybeUninit, bool) { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(old as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(new as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); debug_assert!(kuser_helper_version() >= 2); let (aligned_ptr, shift, mask) = partword::create_mask_values(dst); + let mut out: MaybeUninit; // SAFETY: the caller must uphold the safety contract. unsafe { let mut r: i32; asm!( - concat!("ldr", $asm_suffix, " {old}, [{old}]"), - concat!("ldr", $asm_suffix, " {new}, [{new}]"), "lsl {mask}, {mask}, {shift}", "lsl {old}, {old}, {shift}", "lsl {new}, {new}, {shift}", @@ -349,8 +319,8 @@ macro_rules! atomic_sub_word { // "mvn {inv_mask}, {mask}", "2:", "ldr r0, [r2]", // old_val - "and {out_tmp}, r0, {mask}", - "cmp {out_tmp}, {old}", + "and {out}, r0, {mask}", + "cmp {out}, {old}", "bne 3f", "mvn r1, {mask}", "and r1, r0, r1", @@ -367,14 +337,12 @@ macro_rules! atomic_sub_word { "bne 2b", "mov r0, #1", "4:", - "lsr {out_tmp}, {out_tmp}, {shift}", - concat!("str", $asm_suffix, " {out_tmp}, [{out}]"), - old = inout(reg) old => _, - new = inout(reg) new => _, - out = in(reg) out, + "lsr {out}, {out}, {shift}", + old = inout(reg) crate::utils::zero_extend(old) => _, + new = inout(reg) crate::utils::zero_extend(new) => _, + out = out(reg) out, shift = in(reg) shift, mask = inout(reg) mask => _, - out_tmp = out(reg) _, kuser_cmpxchg = in(reg) KUSER_CMPXCHG, out("r0") r, out("r1") _, @@ -387,7 +355,7 @@ macro_rules! atomic_sub_word { ); debug_assert!(r == 0 || r == 1, "r={}", r); // 0 if the store was successful, 1 if no store was performed - r == 0 + (out, r == 0) } } } @@ -409,16 +377,15 @@ macro_rules! atomic64 { #[inline] unsafe fn atomic_load( src: *const MaybeUninit, - out: *mut MaybeUninit, _order: Ordering, - ) { + ) -> MaybeUninit { debug_assert!(src as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); assert_has_kuser_cmpxchg64(); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); // SAFETY: the caller must uphold the safety contract. unsafe { - let mut out_tmp = MaybeUninit::::uninit(); asm!( "2:", "ldr r0, [r2]", @@ -431,27 +398,27 @@ macro_rules! atomic64 { "bne 2b", kuser_cmpxchg64 = in(reg) KUSER_CMPXCHG64, out("r0") _, - in("r1") out_tmp.as_mut_ptr(), // new_val + in("r1") out_ptr, // new_val in("r2") src, // ptr out("r3") _, out("lr") _, // Do not use `preserves_flags` because CMP and __kuser_cmpxchg64 modify the condition flags. options(nostack), ); - out.write(out_tmp); } + out } } impl AtomicStore for $int_type { #[inline] unsafe fn atomic_store( dst: *mut MaybeUninit, - val: *const MaybeUninit, + val: MaybeUninit, _order: Ordering, ) { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(val as usize % mem::align_of::<$int_type>() == 0); assert_has_kuser_cmpxchg64(); + let val = val.as_ptr(); // SAFETY: the caller must uphold the safety contract. unsafe { @@ -483,18 +450,17 @@ macro_rules! atomic64 { #[inline] unsafe fn atomic_swap( dst: *mut MaybeUninit, - val: *const MaybeUninit, - out: *mut MaybeUninit, + val: MaybeUninit, _order: Ordering, - ) { + ) -> MaybeUninit { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(val as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); assert_has_kuser_cmpxchg64(); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + let val = val.as_ptr(); // SAFETY: the caller must uphold the safety contract. unsafe { - let mut out_tmp = MaybeUninit::::uninit(); asm!( "2:", "ldr r0, [r2]", @@ -505,7 +471,7 @@ macro_rules! atomic64 { blx!("{kuser_cmpxchg64}"), "cmp r0, #0", "bne 2b", - out_tmp = in(reg) out_tmp.as_mut_ptr(), + out_tmp = in(reg) out_ptr, kuser_cmpxchg64 = in(reg) KUSER_CMPXCHG64, out("r0") _, in("r1") val, // new_val @@ -515,33 +481,30 @@ macro_rules! atomic64 { // Do not use `preserves_flags` because CMP and __kuser_cmpxchg64 modify the condition flags. options(nostack), ); - out.write(out_tmp); } + out } } impl AtomicCompareExchange for $int_type { #[inline] unsafe fn atomic_compare_exchange( dst: *mut MaybeUninit, - old: *const MaybeUninit, - new: *const MaybeUninit, - out: *mut MaybeUninit, + old: MaybeUninit, + new: MaybeUninit, _success: Ordering, _failure: Ordering, - ) -> bool { + ) -> (MaybeUninit, bool) { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(old as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(new as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); assert_has_kuser_cmpxchg64(); + let old = MaybeUninit64 { $int_type: old }; + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + let new = new.as_ptr(); // SAFETY: the caller must uphold the safety contract. unsafe { let mut r: i32; - let mut out_tmp = MaybeUninit::::uninit(); asm!( - "ldr {old_lo}, [{old_hi}]", - "ldr {old_hi}, [{old_hi}, #4]", "2:", "ldr r0, [r2]", "ldr r3, [r2, #4]", @@ -567,9 +530,9 @@ macro_rules! atomic64 { "mov r0, #1", "4:", new = in(reg) new, - out_tmp = in(reg) out_tmp.as_mut_ptr(), - old_lo = out(reg) _, - old_hi = inout(reg) old => _, + out_tmp = in(reg) out_ptr, + old_lo = in(reg) old.pair.lo, + old_hi = in(reg) old.pair.hi, kuser_cmpxchg64 = in(reg) KUSER_CMPXCHG64, out("r0") r, out("r1") _, @@ -579,10 +542,9 @@ macro_rules! atomic64 { // Do not use `preserves_flags` because CMP, ORRS, and __kuser_cmpxchg64 modify the condition flags. options(nostack), ); - out.write(out_tmp); debug_assert!(r == 0 || r == 1, "r={}", r); // 0 if the store was successful, 1 if no store was performed - r == 0 + (out, r == 0) } } } diff --git a/src/arch/armv8.rs b/src/arch/armv8.rs index a67f292a..80ecfba8 100644 --- a/src/arch/armv8.rs +++ b/src/arch/armv8.rs @@ -13,9 +13,9 @@ // https://developer.arm.com/documentation/ddi0553/latest // // Generated asm: -// - armv8-a https://godbolt.org/z/Mx8z81463 -// - armv8-m baseline https://godbolt.org/z/P51ezojjW -// - armv8-m mainline https://godbolt.org/z/WdajnbYTr +// - armv8-a https://godbolt.org/z/caGGPfvdT +// - armv8-m baseline https://godbolt.org/z/dMddroqj7 +// - armv8-m mainline https://godbolt.org/z/xYn1bs1rh use core::{ arch::asm, @@ -24,6 +24,8 @@ use core::{ }; use crate::raw::{AtomicCompareExchange, AtomicLoad, AtomicStore, AtomicSwap}; +#[cfg(not(any(target_feature = "mclass", atomic_maybe_uninit_target_feature = "mclass")))] +use crate::utils::{MaybeUninit64, Pair}; macro_rules! atomic_rmw { ($op:ident, $order:ident) => { @@ -59,24 +61,20 @@ macro_rules! atomic { #[inline] unsafe fn atomic_load( src: *const MaybeUninit, - out: *mut MaybeUninit, order: Ordering, - ) { + ) -> MaybeUninit { debug_assert!(src as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); + let out: MaybeUninit; // SAFETY: the caller must uphold the safety contract. unsafe { macro_rules! atomic_load { ($acquire:tt) => { asm!( - // (atomic) load from src to tmp - concat!("ld", $acquire, $asm_suffix, " {tmp}, [{src}]"), - // store tmp to out - concat!("str", $asm_suffix, " {tmp}, [{out}]"), + // (atomic) load from src to out + concat!("ld", $acquire, $asm_suffix, " {out}, [{src}]"), src = in(reg) src, - out = inout(reg) out => _, - tmp = lateout(reg) _, + out = lateout(reg) out, options(nostack, preserves_flags), ) }; @@ -88,30 +86,27 @@ macro_rules! atomic { _ => unreachable!("{:?}", order), } } + out } } impl AtomicStore for $int_type { #[inline] unsafe fn atomic_store( dst: *mut MaybeUninit, - val: *const MaybeUninit, + val: MaybeUninit, order: Ordering, ) { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(val as usize % mem::align_of::<$int_type>() == 0); // SAFETY: the caller must uphold the safety contract. unsafe { macro_rules! atomic_store { ($release:tt) => { asm!( - // load from val to tmp - concat!("ldr", $asm_suffix, " {tmp}, [{val}]"), - // (atomic) store tmp to dst - concat!("st", $release, $asm_suffix, " {tmp}, [{dst}]"), - dst = inout(reg) dst => _, + // (atomic) store val to dst + concat!("st", $release, $asm_suffix, " {val}, [{dst}]"), + dst = in(reg) dst, val = in(reg) val, - tmp = lateout(reg) _, options(nostack, preserves_flags), ) }; @@ -129,37 +124,30 @@ macro_rules! atomic { #[inline] unsafe fn atomic_swap( dst: *mut MaybeUninit, - val: *const MaybeUninit, - out: *mut MaybeUninit, + val: MaybeUninit, order: Ordering, - ) { + ) -> MaybeUninit { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(val as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); + let mut out: MaybeUninit; // SAFETY: the caller must uphold the safety contract. unsafe { macro_rules! swap { ($acquire:tt, $release:tt) => { asm!( - // load from val (ptr) to val (val) - concat!("ldr", $asm_suffix, " {val}, [{val}]"), // (atomic) swap (LL/SC loop) "2:", // load from dst to tmp - concat!("ld", $acquire, "ex", $asm_suffix, " {tmp}, [{dst}]"), + concat!("ld", $acquire, "ex", $asm_suffix, " {out}, [{dst}]"), // try to store val to dst concat!("st", $release, "ex", $asm_suffix, " {r}, {val}, [{dst}]"), // 0 if the store was successful, 1 if no store was performed "cmp {r}, 0x0", "bne 2b", - // store tmp to out - concat!("str", $asm_suffix, " {tmp}, [{out}]"), dst = in(reg) dst, val = inout(reg) val => _, - out = in(reg) out, + out = out(reg) out, r = out(reg) _, - tmp = out(reg) _, // Do not use `preserves_flags` because CMP modifies the condition flags. options(nostack), ) @@ -167,23 +155,21 @@ macro_rules! atomic { } atomic_rmw!(swap, order); } + out } } impl AtomicCompareExchange for $int_type { #[inline] unsafe fn atomic_compare_exchange( dst: *mut MaybeUninit, - old: *const MaybeUninit, - new: *const MaybeUninit, - out: *mut MaybeUninit, + old: MaybeUninit, + new: MaybeUninit, success: Ordering, failure: Ordering, - ) -> bool { + ) -> (MaybeUninit, bool) { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(old as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(new as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); let order = crate::utils::upgrade_success_ordering(success, failure); + let mut out: MaybeUninit; // SAFETY: the caller must uphold the safety contract. unsafe { @@ -191,14 +177,11 @@ macro_rules! atomic { macro_rules! cmpxchg { ($acquire:tt, $release:tt) => { asm!( - // load from old/new (ptr) to old/new (val) - concat!("ldr", $asm_suffix, " {old}, [{old}]"), - concat!("ldr", $asm_suffix, " {new}, [{new}]"), // (atomic) CAS (LL/SC loop) "2:", - // load from dst to tmp - concat!("ld", $acquire, "ex", $asm_suffix, " {tmp}, [{dst}]"), - "cmp {tmp}, {old}", + // load from dst to out + concat!("ld", $acquire, "ex", $asm_suffix, " {out}, [{dst}]"), + "cmp {out}, {old}", "bne 3f", // jump if compare failed // try to store val to dst concat!("st", $release, "ex", $asm_suffix, " {r}, {new}, [{dst}]"), @@ -211,14 +194,11 @@ macro_rules! atomic { "clrex", s!("mov", "{r}, #1"), "4:", - // store tmp to out - concat!("str", $asm_suffix, " {tmp}, [{out}]"), dst = in(reg) dst, - old = inout(reg) old => _, + old = in(reg) crate::utils::zero_extend(old), new = inout(reg) new => _, - out = in(reg) out, + out = out(reg) out, r = out(reg) r, - tmp = out(reg) _, // Do not use `preserves_flags` because CMP and s! modify the condition flags. options(nostack), ) @@ -227,23 +207,20 @@ macro_rules! atomic { atomic_rmw!(cmpxchg, order); debug_assert!(r == 0 || r == 1, "r={}", r); // 0 if the store was successful, 1 if no store was performed - r == 0 + (out, r == 0) } } #[inline] unsafe fn atomic_compare_exchange_weak( dst: *mut MaybeUninit, - old: *const MaybeUninit, - new: *const MaybeUninit, - out: *mut MaybeUninit, + old: MaybeUninit, + new: MaybeUninit, success: Ordering, failure: Ordering, - ) -> bool { + ) -> (MaybeUninit, bool) { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(old as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(new as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); let order = crate::utils::upgrade_success_ordering(success, failure); + let mut out: MaybeUninit; // SAFETY: the caller must uphold the safety contract. unsafe { @@ -251,12 +228,9 @@ macro_rules! atomic { macro_rules! cmpxchg_weak { ($acquire:tt, $release:tt) => { asm!( - // load from old/new (ptr) to old/new (val) - concat!("ldr", $asm_suffix, " {old}, [{old}]"), - concat!("ldr", $asm_suffix, " {new}, [{new}]"), - // load from dst to tmp - concat!("ld", $acquire, "ex", $asm_suffix, " {tmp}, [{dst}]"), - "cmp {tmp}, {old}", + // load from dst to out + concat!("ld", $acquire, "ex", $asm_suffix, " {out}, [{dst}]"), + "cmp {out}, {old}", "bne 3f", // try to store new to dst concat!("st", $release, "ex", $asm_suffix, " {r}, {new}, [{dst}]"), @@ -266,14 +240,11 @@ macro_rules! atomic { "clrex", s!("mov", "{r}, #1"), "4:", - // store tmp to out - concat!("str", $asm_suffix, " {tmp}, [{out}]"), dst = in(reg) dst, - old = inout(reg) old => _, + old = in(reg) crate::utils::zero_extend(old), new = inout(reg) new => _, - out = in(reg) out, + out = out(reg) out, r = out(reg) r, - tmp = out(reg) _, // Do not use `preserves_flags` because CMP and s! modify the condition flags. options(nostack), ) @@ -282,7 +253,7 @@ macro_rules! atomic { atomic_rmw!(cmpxchg_weak, order); debug_assert!(r == 0 || r == 1, "r={}", r); // 0 if the store was successful, 1 if no store was performed - r == 0 + (out, r == 0) } } } @@ -306,11 +277,10 @@ macro_rules! atomic64 { #[inline] unsafe fn atomic_load( src: *const MaybeUninit, - out: *mut MaybeUninit, order: Ordering, - ) { + ) -> MaybeUninit { debug_assert!(src as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); + let (prev_lo, prev_hi); // SAFETY: the caller must uphold the safety contract. unsafe { @@ -320,13 +290,10 @@ macro_rules! atomic64 { // (atomic) load from src to tmp pair concat!("ld", $acquire, "exd r2, r3, [{src}]"), "clrex", - // store tmp pair to out - "strd r2, r3, [{out}]", src = in(reg) src, - out = in(reg) out, - // tmp pair - must be even-numbered and not R14 - out("r2") _, - out("r3") _, + // prev pair - must be even-numbered and not R14 + out("r2") prev_lo, + out("r3") prev_hi, options(nostack, preserves_flags), ) }; @@ -337,6 +304,7 @@ macro_rules! atomic64 { Ordering::Acquire | Ordering::SeqCst => atomic_load!("a"), _ => unreachable!("{:?}", order), } + MaybeUninit64 { pair: Pair { lo: prev_lo, hi: prev_hi } }.$int_type } } } @@ -345,34 +313,30 @@ macro_rules! atomic64 { #[inline] unsafe fn atomic_store( dst: *mut MaybeUninit, - val: *const MaybeUninit, + val: MaybeUninit, order: Ordering, ) { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(val as usize % mem::align_of::<$int_type>() == 0); + let val = MaybeUninit64 { $int_type: val }; // SAFETY: the caller must uphold the safety contract. unsafe { macro_rules! store { ($acquire:tt, $release:tt) => { asm!( - // load from val to val pair - "ldrd r2, r3, [{val}]", // (atomic) store val pair to dst (LL/SC loop) "2:", // load from dst to tmp pair concat!("ld", $acquire, "exd r4, r5, [{dst}]"), // try to store val pair to dst - concat!("st", $release, "exd {r}, r2, r3, [{dst}]"), + concat!("st", $release, "exd r4, r2, r3, [{dst}]"), // 0 if the store was successful, 1 if no store was performed - "cmp {r}, 0x0", + "cmp r4, 0x0", "bne 2b", - dst = inout(reg) dst => _, - val = in(reg) val, - r = lateout(reg) _, + dst = in(reg) dst, // val pair - must be even-numbered and not R14 - out("r2") _, - out("r3") _, + inout("r2") val.pair.lo => _, + inout("r3") val.pair.hi => _, // tmp pair - must be even-numbered and not R14 out("r4") _, out("r5") _, @@ -390,21 +354,18 @@ macro_rules! atomic64 { #[inline] unsafe fn atomic_swap( dst: *mut MaybeUninit, - val: *const MaybeUninit, - out: *mut MaybeUninit, + val: MaybeUninit, order: Ordering, - ) { + ) -> MaybeUninit { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(val as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); + let val = MaybeUninit64 { $int_type: val }; + let (mut prev_lo, mut prev_hi); // SAFETY: the caller must uphold the safety contract. unsafe { macro_rules! swap { ($acquire:tt, $release:tt) => { asm!( - // load from val to val pair - "ldrd r2, r3, [{val}]", // (atomic) swap (LL/SC loop) "2:", // load from dst to out pair @@ -414,24 +375,21 @@ macro_rules! atomic64 { // 0 if the store was successful, 1 if no store was performed "cmp {r}, 0x0", "bne 2b", - // store out pair to out - "strd r4, r5, [{out}]", - dst = inout(reg) dst => _, - val = in(reg) val, - out = inout(reg) out => _, - r = lateout(reg) _, + dst = in(reg) dst, + r = out(reg) _, // val pair - must be even-numbered and not R14 - out("r2") _, - out("r3") _, - // out pair - must be even-numbered and not R14 - out("r4") _, - out("r5") _, + inout("r2") val.pair.lo => _, + inout("r3") val.pair.hi => _, + // prev pair - must be even-numbered and not R14 + out("r4") prev_lo, + out("r5") prev_hi, // Do not use `preserves_flags` because CMP modifies the condition flags. options(nostack), ) }; } atomic_rmw!(swap, order); + MaybeUninit64 { pair: Pair { lo: prev_lo, hi: prev_hi } }.$int_type } } } @@ -440,17 +398,16 @@ macro_rules! atomic64 { #[inline] unsafe fn atomic_compare_exchange( dst: *mut MaybeUninit, - old: *const MaybeUninit, - new: *const MaybeUninit, - out: *mut MaybeUninit, + old: MaybeUninit, + new: MaybeUninit, success: Ordering, failure: Ordering, - ) -> bool { + ) -> (MaybeUninit, bool) { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(old as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(new as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); let order = crate::utils::upgrade_success_ordering(success, failure); + let old = MaybeUninit64 { $int_type: old }; + let new = MaybeUninit64 { $int_type: new }; + let (mut prev_lo, mut prev_hi); // SAFETY: the caller must uphold the safety contract. unsafe { @@ -458,13 +415,11 @@ macro_rules! atomic64 { macro_rules! cmpxchg { ($acquire:tt, $release:tt) => { asm!( - "ldrd r2, r3, [{old}]", - "ldrd r8, r9, [{new}]", // (atomic) CAS (LL/SC loop) "2:", concat!("ld", $acquire, "exd r4, r5, [{dst}]"), - "eor {tmp}, r5, r3", - "eor {r}, r4, r2", + "eor {tmp}, r5, {old_hi}", + "eor {r}, r4, {old_lo}", "orrs {r}, {r}, {tmp}", "bne 3f", // jump if compare failed concat!("st", $release, "exd {r}, r8, r9, [{dst}]"), @@ -477,23 +432,17 @@ macro_rules! atomic64 { "clrex", s!("mov", "{r}, #1"), "4:", - // store out pair to out - "strd r4, r5, [{out}]", - dst = inout(reg) dst => _, - r = lateout(reg) r, - old = in(reg) old, - new = in(reg) new, - out = inout(reg) out => _, + dst = in(reg) dst, + old_lo = in(reg) old.pair.lo, + old_hi = in(reg) old.pair.hi, + r = out(reg) r, tmp = out(reg) _, - // old pair - must be even-numbered and not R14 - out("r2") _, - out("r3") _, - // out pair - must be even-numbered and not R14 - out("r4") _, - out("r5") _, + // prev pair - must be even-numbered and not R14 + out("r4") prev_lo, + out("r5") prev_hi, // new pair - must be even-numbered and not R14 - out("r8") _, - out("r9") _, + inout("r8") new.pair.lo => _, + inout("r9") new.pair.hi => _, // Do not use `preserves_flags` because CMP, ORRS, and s! modify the condition flags. options(nostack), ) @@ -502,23 +451,22 @@ macro_rules! atomic64 { atomic_rmw!(cmpxchg, order); debug_assert!(r == 0 || r == 1, "r={}", r); // 0 if the store was successful, 1 if no store was performed - r == 0 + (MaybeUninit64 { pair: Pair { lo: prev_lo, hi: prev_hi } }.$int_type, r == 0) } } #[inline] unsafe fn atomic_compare_exchange_weak( dst: *mut MaybeUninit, - old: *const MaybeUninit, - new: *const MaybeUninit, - out: *mut MaybeUninit, + old: MaybeUninit, + new: MaybeUninit, success: Ordering, failure: Ordering, - ) -> bool { + ) -> (MaybeUninit, bool) { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(old as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(new as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); let order = crate::utils::upgrade_success_ordering(success, failure); + let old = MaybeUninit64 { $int_type: old }; + let new = MaybeUninit64 { $int_type: new }; + let (mut prev_lo, mut prev_hi); // SAFETY: the caller must uphold the safety contract. unsafe { @@ -526,11 +474,9 @@ macro_rules! atomic64 { macro_rules! cmpxchg_weak { ($acquire:tt, $release:tt) => { asm!( - "ldrd r2, r3, [{old}]", - "ldrd r8, r9, [{new}]", concat!("ld", $acquire, "exd r4, r5, [{dst}]"), - "eor {tmp}, r5, r3", - "eor {r}, r4, r2", + "eor {tmp}, r5, {old_hi}", + "eor {r}, r4, {old_lo}", "orrs {r}, {r}, {tmp}", "bne 3f", // jump if compare failed concat!("st", $release, "exd {r}, r8, r9, [{dst}]"), @@ -540,23 +486,17 @@ macro_rules! atomic64 { "clrex", s!("mov", "{r}, #1"), "4:", - // store out pair to out - "strd r4, r5, [{out}]", - dst = inout(reg) dst => _, - r = lateout(reg) r, - old = in(reg) old, - new = in(reg) new, - out = inout(reg) out => _, + dst = in(reg) dst, + old_lo = in(reg) old.pair.lo, + old_hi = in(reg) old.pair.hi, + r = out(reg) r, tmp = out(reg) _, - // old pair - must be even-numbered and not R14 - out("r2") _, - out("r3") _, - // out pair - must be even-numbered and not R14 - out("r4") _, - out("r5") _, + // prev pair - must be even-numbered and not R14 + out("r4") prev_lo, + out("r5") prev_hi, // new pair - must be even-numbered and not R14 - out("r8") _, - out("r9") _, + inout("r8") new.pair.lo => _, + inout("r9") new.pair.hi => _, // Do not use `preserves_flags` because ORRS and s! modify the condition flags. options(nostack), ) @@ -565,7 +505,7 @@ macro_rules! atomic64 { atomic_rmw!(cmpxchg_weak, order); debug_assert!(r == 0 || r == 1, "r={}", r); // 0 if the store was successful, 1 if no store was performed - r == 0 + (MaybeUninit64 { pair: Pair { lo: prev_lo, hi: prev_hi } }.$int_type, r == 0) } } } diff --git a/src/arch/avr.rs b/src/arch/avr.rs index 17464709..471105c4 100644 --- a/src/arch/avr.rs +++ b/src/arch/avr.rs @@ -45,15 +45,14 @@ macro_rules! atomic { #[inline] unsafe fn atomic_load( src: *const MaybeUninit, - out: *mut MaybeUninit, _order: Ordering, - ) { + ) -> MaybeUninit { // SAFETY: the caller must uphold the safety contract. unsafe { let s = disable(); - let v = src.read(); + let out = src.read(); restore(s); - out.write(v); + out } } } @@ -61,14 +60,13 @@ macro_rules! atomic { #[inline] unsafe fn atomic_store( dst: *mut MaybeUninit, - val: *const MaybeUninit, + val: MaybeUninit, _order: Ordering, ) { // SAFETY: the caller must uphold the safety contract. unsafe { - let v = val.read(); let s = disable(); - dst.write(v); + dst.write(val); restore(s); } } diff --git a/src/arch/hexagon.rs b/src/arch/hexagon.rs index f2cdc599..e0807a2c 100644 --- a/src/arch/hexagon.rs +++ b/src/arch/hexagon.rs @@ -11,7 +11,10 @@ mod partword; use core::{arch::asm, mem::MaybeUninit, sync::atomic::Ordering}; -use crate::raw::{AtomicCompareExchange, AtomicLoad, AtomicStore, AtomicSwap}; +use crate::{ + raw::{AtomicCompareExchange, AtomicLoad, AtomicStore, AtomicSwap}, + utils::{MaybeUninit64, Pair}, +}; type XSize = usize; @@ -21,41 +24,37 @@ macro_rules! atomic_load_store { #[inline] unsafe fn atomic_load( src: *const MaybeUninit, - out: *mut MaybeUninit, _order: Ordering, - ) { + ) -> MaybeUninit { + let out: MaybeUninit; + // SAFETY: the caller must uphold the safety contract. unsafe { asm!( - // (atomic) load from src to tmp - concat!("{tmp} = mem", $asm_u_suffix, $asm_suffix, "({src})"), - // store tmp to out - concat!("mem", $asm_suffix, "({out}) = {tmp}"), + // (atomic) load from src to out + concat!("{out} = mem", $asm_u_suffix, $asm_suffix, "({src})"), src = in(reg) src, - out = inout(reg) out => _, - tmp = lateout(reg) _, + out = lateout(reg) out, options(nostack, preserves_flags), ); } + out } } impl AtomicStore for $int_type { #[inline] unsafe fn atomic_store( dst: *mut MaybeUninit, - val: *const MaybeUninit, + val: MaybeUninit, _order: Ordering, ) { // SAFETY: the caller must uphold the safety contract. unsafe { asm!( - // load from val to tmp - concat!("{tmp} = mem", $asm_u_suffix, $asm_suffix, "({val})"), - // (atomic) store tmp to dst - concat!("mem", $asm_suffix, "({dst}) = {tmp}"), - dst = inout(reg) dst => _, + // (atomic) store val to dst + concat!("mem", $asm_suffix, "({dst}) = {val}"), + dst = in(reg) dst, val = in(reg) val, - tmp = lateout(reg) _, options(nostack, preserves_flags), ); } @@ -71,47 +70,45 @@ macro_rules! atomic { #[inline(never)] // TODO: there is no way to mark p0 as clobbered unsafe fn atomic_swap( dst: *mut MaybeUninit, - val: *const MaybeUninit, - out: *mut MaybeUninit, + val: MaybeUninit, _order: Ordering, - ) { + ) -> MaybeUninit { + let mut out: MaybeUninit; + // SAFETY: the caller must uphold the safety contract. unsafe { asm!( - "{val} = memw({val})", "2:", - "{tmp} = memw_locked({dst})", + "{out} = memw_locked({dst})", "memw_locked({dst},p0) = {val}", "if (!p0) jump 2b", - "memw({out}) = {tmp}", dst = in(reg) dst, - val = inout(reg) val => _, - out = in(reg) out, - tmp = out(reg) _, + val = in(reg) val, + out = out(reg) out, options(nostack), ); } + out } } impl AtomicCompareExchange for $int_type { #[inline(never)] // TODO: there is no way to mark p0 as clobbered unsafe fn atomic_compare_exchange( dst: *mut MaybeUninit, - old: *const MaybeUninit, - new: *const MaybeUninit, - out: *mut MaybeUninit, + old: MaybeUninit, + new: MaybeUninit, _success: Ordering, _failure: Ordering, - ) -> bool { + ) -> (MaybeUninit, bool) { + let mut out: MaybeUninit; + // SAFETY: the caller must uphold the safety contract. unsafe { let mut r: i32 = 1; asm!( - "{old} = memw({old})", - "{new} = memw({new})", "2:", - "{tmp} = memw_locked({dst})", - "{{ p0 = cmp.eq({tmp},{old})", + "{out} = memw_locked({dst})", + "{{ p0 = cmp.eq({out},{old})", "if (!p0.new) jump:nt 3f }}", "memw_locked({dst},p0) = {new}", "if (!p0) jump 2b", @@ -119,17 +116,15 @@ macro_rules! atomic { "3:", "{r} = #0", "4:", - "memw({out}) = {tmp}", dst = in(reg) dst, - old = inout(reg) old => _, - new = inout(reg) new => _, - out = in(reg) out, - tmp = out(reg) _, + old = in(reg) old, + new = in(reg) new, + out = out(reg) out, r = inout(reg) r, options(nostack), ); debug_assert!(r == 0 || r == 1, "r={}", r); - r != 0 + (out, r != 0) } } } @@ -143,59 +138,55 @@ macro_rules! atomic_sub_word { #[inline(never)] // TODO: there is no way to mark p0 as clobbered unsafe fn atomic_swap( dst: *mut MaybeUninit, - val: *const MaybeUninit, - out: *mut MaybeUninit, + val: MaybeUninit, _order: Ordering, - ) { + ) -> MaybeUninit { let (aligned_ptr, shift, mask) = partword::create_mask_values(dst); + let mut out: MaybeUninit; // SAFETY: the caller must uphold the safety contract. unsafe { asm!( - concat!("{val} = mem", $asm_u_suffix, $asm_suffix, "({val})"), "{mask} = asl({mask},{shift})", "{val} = asl({val},{shift})", "{val} = and({val},{mask})", "{inv_mask} = not({mask})", "2:", - "{out_tmp} = memw_locked({dst})", - "{tmp} = and({out_tmp},{inv_mask})", + "{out} = memw_locked({dst})", + "{tmp} = and({out},{inv_mask})", "{tmp} = or({tmp},{val})", "memw_locked({dst},p0) = {tmp}", "if (!p0) jump 2b", - "{out_tmp} = asr({out_tmp},{shift})", - concat!("mem", $asm_suffix, "({out}) = {out_tmp}"), + "{out} = asr({out},{shift})", dst = in(reg) aligned_ptr, - val = inout(reg) val => _, - out = in(reg) out, + val = inout(reg) crate::utils::zero_extend(val) => _, + out = out(reg) out, shift = in(reg) shift, mask = inout(reg) mask => _, inv_mask = out(reg) _, - out_tmp = out(reg) _, tmp = out(reg) _, options(nostack), ); } + out } } impl AtomicCompareExchange for $int_type { #[inline(never)] // TODO: there is no way to mark p0 as clobbered unsafe fn atomic_compare_exchange( dst: *mut MaybeUninit, - old: *const MaybeUninit, - new: *const MaybeUninit, - out: *mut MaybeUninit, + old: MaybeUninit, + new: MaybeUninit, _success: Ordering, _failure: Ordering, - ) -> bool { + ) -> (MaybeUninit, bool) { let (aligned_ptr, shift, mask) = partword::create_mask_values(dst); + let mut out: MaybeUninit; // SAFETY: the caller must uphold the safety contract. unsafe { let mut r: i32 = 1; asm!( - concat!("{old} = mem", $asm_u_suffix, $asm_suffix, "({old})"), - concat!("{new} = mem", $asm_u_suffix, $asm_suffix, "({new})"), "{mask} = asl({mask},{shift})", "{old} = asl({old},{shift})", "{new} = asl({new},{shift})", @@ -204,8 +195,8 @@ macro_rules! atomic_sub_word { "{inv_mask} = not({mask})", "2:", "{tmp} = memw_locked({dst})", - "{out_tmp} = and({tmp},{mask})", - "{{ p0 = cmp.eq({out_tmp},{old})", + "{out} = and({tmp},{mask})", + "{{ p0 = cmp.eq({out},{old})", "if (!p0.new) jump:nt 3f }}", "{tmp} = and({tmp},{inv_mask})", "{tmp} = or({tmp},{new})", @@ -215,22 +206,20 @@ macro_rules! atomic_sub_word { "3:", "{r} = #0", "4:", - "{out_tmp} = asr({out_tmp},{shift})", - concat!("mem", $asm_suffix, "({out}) = {out_tmp}"), + "{out} = asr({out},{shift})", dst = in(reg) aligned_ptr, - old = inout(reg) old => _, - new = inout(reg) new => _, - out = in(reg) out, + old = inout(reg) crate::utils::zero_extend(old) => _, + new = inout(reg) crate::utils::zero_extend(new) => _, + out = out(reg) out, shift = in(reg) shift, mask = inout(reg) mask => _, inv_mask = out(reg) _, - out_tmp = out(reg) _, tmp = out(reg) _, r = inout(reg) r, options(nostack), ); debug_assert!(r == 0 || r == 1, "r={}", r); - r != 0 + (out, r != 0) } } } @@ -252,22 +241,21 @@ macro_rules! atomic64 { #[inline] unsafe fn atomic_load( src: *const MaybeUninit, - out: *mut MaybeUninit, _order: Ordering, - ) { + ) -> MaybeUninit { + let (prev_lo, prev_hi); + // SAFETY: the caller must uphold the safety contract. unsafe { asm!( - // (atomic) load from src to tmp pair + // (atomic) load from src to prev pair "{{ r3:2 = memd({src}) }}", - // store tmp pair to out - "memd({out}) = r3:2", src = in(reg) src, - out = in(reg) out, - out("r2") _, // tmp - out("r3") _, // tmp + out("r2") prev_lo, + out("r3") prev_hi, options(nostack, preserves_flags), ); + MaybeUninit64 { pair: Pair { lo: prev_lo, hi: prev_hi } }.$int_type } } } @@ -275,20 +263,19 @@ macro_rules! atomic64 { #[inline] unsafe fn atomic_store( dst: *mut MaybeUninit, - val: *const MaybeUninit, + val: MaybeUninit, _order: Ordering, ) { + let val = MaybeUninit64 { $int_type: val }; + // SAFETY: the caller must uphold the safety contract. unsafe { asm!( - // load from val to tmp pair - "{{ r3:2 = memd({val}) }}", - // (atomic) store tmp pair to dst + // (atomic) store val pair to dst "memd({dst}) = r3:2", dst = in(reg) dst, - val = in(reg) val, - out("r2") _, // tmp - out("r3") _, // tmp + in("r2") val.pair.lo, + in("r3") val.pair.hi, options(nostack, preserves_flags), ); } @@ -298,28 +285,27 @@ macro_rules! atomic64 { #[inline(never)] // TODO: there is no way to mark p0 as clobbered unsafe fn atomic_swap( dst: *mut MaybeUninit, - val: *const MaybeUninit, - out: *mut MaybeUninit, + val: MaybeUninit, _order: Ordering, - ) { + ) -> MaybeUninit { + let val = MaybeUninit64 { $int_type: val }; + let (mut prev_lo, mut prev_hi); + // SAFETY: the caller must uphold the safety contract. unsafe { asm!( - "{{ r3:2 = memd({val}) }}", "2:", "{{ r5:4 = memd_locked({dst}) }}", "memd_locked({dst},p0) = r3:2", "if (!p0) jump 2b", - "memd({out}) = r5:4", dst = in(reg) dst, - val = in(reg) val, - out = in(reg) out, - out("r2") _, // val - out("r3") _, // val - out("r4") _, // tmp - out("r5") _, // tmp + in("r2") val.pair.lo, + in("r3") val.pair.hi, + out("r4") prev_lo, + out("r5") prev_hi, options(nostack), ); + MaybeUninit64 { pair: Pair { lo: prev_lo, hi: prev_hi } }.$int_type } } } @@ -327,18 +313,19 @@ macro_rules! atomic64 { #[inline(never)] // TODO: there is no way to mark p0 as clobbered unsafe fn atomic_compare_exchange( dst: *mut MaybeUninit, - old: *const MaybeUninit, - new: *const MaybeUninit, - out: *mut MaybeUninit, + old: MaybeUninit, + new: MaybeUninit, _success: Ordering, _failure: Ordering, - ) -> bool { + ) -> (MaybeUninit, bool) { + let old = MaybeUninit64 { $int_type: old }; + let new = MaybeUninit64 { $int_type: new }; + let (mut prev_lo, mut prev_hi); + // SAFETY: the caller must uphold the safety contract. unsafe { let mut r: i32 = 1; asm!( - "{{ r3:2 = memd({old}) }}", - "{{ r5:4 = memd({new}) }}", "2:", "{{ r7:6 = memd_locked({dst}) }}", // TODO: merge two cmp? @@ -352,22 +339,18 @@ macro_rules! atomic64 { "3:", "{r} = #0", "4:", - "memd({out}) = r7:6", dst = in(reg) dst, - old = in(reg) old, - new = in(reg) new, - out = in(reg) out, - out("r2") _, // old - out("r3") _, // old - out("r4") _, // new - out("r5") _, // new - out("r6") _, // tmp - out("r7") _, // tmp r = inout(reg) r, + in("r2") old.pair.lo, + in("r3") old.pair.hi, + in("r4") new.pair.lo, + in("r5") new.pair.hi, + out("r6") prev_lo, + out("r7") prev_hi, options(nostack), ); debug_assert!(r == 0 || r == 1, "r={}", r); - r != 0 + (MaybeUninit64 { pair: Pair { lo: prev_lo, hi: prev_hi } }.$int_type, r != 0) } } } diff --git a/src/arch/loongarch.rs b/src/arch/loongarch.rs index f1c871eb..174ec942 100644 --- a/src/arch/loongarch.rs +++ b/src/arch/loongarch.rs @@ -3,7 +3,7 @@ // LoongArch // // Generated asm: -// - loongarch64 https://godbolt.org/z/vTxfajT14 +// - loongarch64 https://godbolt.org/z/4c7sTaE81 #[path = "partword.rs"] mod partword; @@ -26,25 +26,21 @@ macro_rules! atomic_load { #[inline] unsafe fn atomic_load( src: *const MaybeUninit, - out: *mut MaybeUninit, order: Ordering, - ) { + ) -> MaybeUninit { debug_assert!(src as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); + let out: MaybeUninit; // SAFETY: the caller must uphold the safety contract. unsafe { macro_rules! atomic_load { ($acquire:tt) => { asm!( - // (atomic) load from src to tmp - concat!("ld.", $asm_suffix, " {tmp}, {src}, 0"), + // (atomic) load from src to out + concat!("ld.", $asm_suffix, " {out}, {src}, 0"), $acquire, - // store tmp to out - concat!("st.", $asm_suffix, " {tmp}, {out}, 0"), src = in(reg) ptr_reg!(src), - out = inout(reg) ptr_reg!(out) => _, - tmp = lateout(reg) _, + out = lateout(reg) out, options(nostack, preserves_flags), ) }; @@ -56,6 +52,7 @@ macro_rules! atomic_load { _ => unreachable!("{:?}", order), } } + out } } }; @@ -68,36 +65,29 @@ macro_rules! atomic { #[inline] unsafe fn atomic_store( dst: *mut MaybeUninit, - val: *const MaybeUninit, + val: MaybeUninit, order: Ordering, ) { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(val as usize % mem::align_of::<$int_type>() == 0); // SAFETY: the caller must uphold the safety contract. unsafe { match order { Ordering::Relaxed => { asm!( - // load from val to tmp - concat!("ld.", $asm_suffix, " {tmp}, {val}, 0"), - // (atomic) store tmp to dst - concat!("st.", $asm_suffix, " {tmp}, {dst}, 0"), - dst = inout(reg) ptr_reg!(dst) => _, - val = in(reg) ptr_reg!(val), - tmp = lateout(reg) _, + // (atomic) store val to dst + concat!("st.", $asm_suffix, " {val}, {dst}, 0"), + dst = in(reg) ptr_reg!(dst), + val = in(reg) val, options(nostack, preserves_flags), ); } Ordering::Release | Ordering::SeqCst => { asm!( - // load from val to tmp - concat!("ld.", $asm_suffix, " {tmp}, {val}, 0"), - // (atomic) store tmp to dst - concat!("amswap_db.", $asm_suffix, " $zero, {tmp}, {dst}"), - dst = inout(reg) ptr_reg!(dst) => _, - val = in(reg) ptr_reg!(val), - tmp = lateout(reg) _, + // (atomic) store val to dst + concat!("amswap_db.", $asm_suffix, " $zero, {val}, {dst}"), + dst = in(reg) ptr_reg!(dst), + val = in(reg) val, options(nostack, preserves_flags), ) } @@ -110,60 +100,48 @@ macro_rules! atomic { #[inline] unsafe fn atomic_swap( dst: *mut MaybeUninit, - val: *const MaybeUninit, - out: *mut MaybeUninit, + val: MaybeUninit, _order: Ordering, - ) { + ) -> MaybeUninit { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(val as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); + let out: MaybeUninit; // SAFETY: the caller must uphold the safety contract. unsafe { asm!( - // load from val (ptr) to val (val) - concat!("ld.", $asm_suffix, " {val}, {val}, 0"), // (atomic) swap (AMO) - // - load value from dst and store it to tmp + // - load value from dst and store it to out // - store value of val to dst - concat!("amswap_db.", $asm_suffix, " {tmp}, {val}, {dst}"), - // store tmp to out - concat!("st.", $asm_suffix, " {tmp}, {out}, 0"), + concat!("amswap_db.", $asm_suffix, " {out}, {val}, {dst}"), dst = in(reg) ptr_reg!(dst), - val = inout(reg) ptr_reg!(val) => _, - out = inout(reg) ptr_reg!(out) => _, - tmp = out(reg) _, + val = in(reg) val, + out = out(reg) out, options(nostack, preserves_flags), ) } + out } } impl AtomicCompareExchange for $int_type { #[inline] unsafe fn atomic_compare_exchange( dst: *mut MaybeUninit, - old: *const MaybeUninit, - new: *const MaybeUninit, - out: *mut MaybeUninit, + old: MaybeUninit, + new: MaybeUninit, _success: Ordering, _failure: Ordering, - ) -> bool { + ) -> (MaybeUninit, bool) { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(old as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(new as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); + let mut out: MaybeUninit; // SAFETY: the caller must uphold the safety contract. unsafe { let mut r: XSize; asm!( - // load from old/new (ptr) to old/new (val) - concat!("ld.", $asm_suffix, " {old}, {old}, 0"), - concat!("ld.", $asm_suffix, " {new}, {new}, 0"), // (atomic) CAS (LL/SC loop) "2:", - concat!("ll.", $asm_suffix, " {tmp}, {dst}, 0"), - "bne {tmp}, {old}, 3f", // compare and jump if compare failed + concat!("ll.", $asm_suffix, " {out}, {dst}, 0"), + "bne {out}, {old}, 3f", // compare and jump if compare failed "dbar 0", "move {r}, {new}", concat!("sc.", $asm_suffix, " {r}, {dst}, 0"), @@ -172,20 +150,17 @@ macro_rules! atomic { "3:", "dbar 1792", "4:", - // store tmp to out - concat!("st.", $asm_suffix, " {tmp}, {out}, 0"), - "xor {r}, {tmp}, {old}", + "xor {r}, {out}, {old}", "sltui {r}, {r}, 1", dst = in(reg) ptr_reg!(dst), - old = inout(reg) ptr_reg!(old) => _, - new = inout(reg) ptr_reg!(new) => _, - out = in(reg) ptr_reg!(out), - tmp = out(reg) _, + old = in(reg) old, + new = in(reg) new, + out = out(reg) out, r = out(reg) r, options(nostack, preserves_flags), ); debug_assert!(r == 0 || r == 1, "r={}", r); - r != 0 + (out, r != 0) } } } @@ -199,26 +174,22 @@ macro_rules! atomic_sub_word { #[inline] unsafe fn atomic_store( dst: *mut MaybeUninit, - val: *const MaybeUninit, + val: MaybeUninit, order: Ordering, ) { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(val as usize % mem::align_of::<$int_type>() == 0); // SAFETY: the caller must uphold the safety contract. unsafe { macro_rules! atomic_store { ($acquire:tt, $release:tt) => { asm!( - // load from val to tmp - concat!("ld.", $asm_suffix, " {tmp}, {val}, 0"), - // (atomic) store tmp to dst + // (atomic) store val to dst $release, - concat!("st.", $asm_suffix, " {tmp}, {dst}, 0"), + concat!("st.", $asm_suffix, " {val}, {dst}, 0"), $acquire, - dst = inout(reg) ptr_reg!(dst) => _, - val = in(reg) ptr_reg!(val), - tmp = lateout(reg) _, + dst = in(reg) ptr_reg!(dst), + val = in(reg) val, options(nostack, preserves_flags), ) }; @@ -236,14 +207,12 @@ macro_rules! atomic_sub_word { #[inline] unsafe fn atomic_swap( dst: *mut MaybeUninit, - val: *const MaybeUninit, - out: *mut MaybeUninit, + val: MaybeUninit, order: Ordering, - ) { + ) -> MaybeUninit { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(val as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); let (aligned_ptr, shift, mask) = partword::create_mask_values(dst); + let mut out: MaybeUninit; // SAFETY: the caller must uphold the safety contract. unsafe { @@ -255,28 +224,25 @@ macro_rules! atomic_sub_word { asm!( "sll.w {mask}, {mask}, {shift}", "addi.w {mask}, {mask}, 0", - concat!("ld.", $asm_suffix, "u {val}, {val}, 0"), "sll.w {val}, {val}, {shift}", "addi.w {val}, {val}, 0", // (atomic) swap (LL/SC loop) "2:", $fence, - "ll.w {tmp1}, {dst}, 0", - "addi.w {tmp2}, {val}, 0", - "xor {tmp2}, {tmp1}, {tmp2}", - "and {tmp2}, {tmp2}, {mask}", - "xor {tmp2}, {tmp1}, {tmp2}", - "sc.w {tmp2}, {dst}, 0", - "beqz {tmp2}, 2b", - "srl.w {tmp1}, {tmp1}, {shift}", - concat!("st.", $asm_suffix, " {tmp1}, {out}, 0"), + "ll.w {out}, {dst}, 0", + "addi.w {tmp}, {val}, 0", + "xor {tmp}, {out}, {tmp}", + "and {tmp}, {tmp}, {mask}", + "xor {tmp}, {out}, {tmp}", + "sc.w {tmp}, {dst}, 0", + "beqz {tmp}, 2b", + "srl.w {out}, {out}, {shift}", dst = in(reg) ptr_reg!(aligned_ptr), - val = inout(reg) ptr_reg!(val) => _, - out = in(reg) ptr_reg!(out), + val = inout(reg) crate::utils::zero_extend(val) => _, + out = out(reg) out, shift = in(reg) shift, mask = inout(reg) mask => _, - tmp1 = out(reg) _, - tmp2 = out(reg) _, + tmp = out(reg) _, options(nostack, preserves_flags), ) }; @@ -286,23 +252,21 @@ macro_rules! atomic_sub_word { _ => atomic_swap!("dbar 0"), } } + out } } impl AtomicCompareExchange for $int_type { #[inline] unsafe fn atomic_compare_exchange( dst: *mut MaybeUninit, - old: *const MaybeUninit, - new: *const MaybeUninit, - out: *mut MaybeUninit, + old: MaybeUninit, + new: MaybeUninit, _success: Ordering, _failure: Ordering, - ) -> bool { + ) -> (MaybeUninit, bool) { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(old as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(new as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); let (aligned_ptr, shift, mask) = partword::create_mask_values(dst); + let mut out: MaybeUninit; // SAFETY: the caller must uphold the safety contract. unsafe { @@ -311,8 +275,6 @@ macro_rules! atomic_sub_word { // Based on assemblies generated by rustc/LLVM. // See also partword.rs. asm!( - concat!("ld.", $asm_suffix, "u {new}, {new}, 0"), - concat!("ld.", $asm_suffix, "u {old}, {old}, 0"), "sll.w {new}, {new}, {shift}", "addi.w {new}, {new}, 0", "sll.w {old}, {old}, {shift}", @@ -334,26 +296,24 @@ macro_rules! atomic_sub_word { "dbar 1792", "4:", "srl.w $a6, $t0, {shift}", - concat!("st.", $asm_suffix, " $a6, {out}, 0"), "and {r}, $t0, {mask}", "addi.w {r}, {r}, 0", "xor {r}, {old}, {r}", "sltui {r}, {r}, 1", dst = in(reg) ptr_reg!(aligned_ptr), - old = inout(reg) ptr_reg!(old) => _, - new = inout(reg) ptr_reg!(new) => _, - out = inout(reg) ptr_reg!(out) => _, + old = inout(reg) crate::utils::zero_extend(old) => _, + new = inout(reg) crate::utils::zero_extend(new) => _, shift = in(reg) shift, mask = inout(reg) mask => _, r = lateout(reg) r, - out("$a6") _, + out("$a6") out, out("$a7") _, out("$t0") _, out("$t1") _, options(nostack, preserves_flags), ); debug_assert!(r == 0 || r == 1, "r={}", r); - r != 0 + (out, r != 0) } } } diff --git a/src/arch/mips.rs b/src/arch/mips.rs index 79ae19f7..e39187aa 100644 --- a/src/arch/mips.rs +++ b/src/arch/mips.rs @@ -3,10 +3,10 @@ // MIPS32 and MIPS64 // // Generated asm: -// - mips https://godbolt.org/z/38oKcY5bj -// - mipsel https://godbolt.org/z/M18x694zh -// - mips64 https://godbolt.org/z/GMMda9rM8 -// - mips64el https://godbolt.org/z/31ovT3vzW +// - mips https://godbolt.org/z/hbK74Y17c +// - mipsel https://godbolt.org/z/aoazd4rjY +// - mips64 https://godbolt.org/z/18ees8xqx +// - mips64el https://godbolt.org/z/rrcP1o19q #[path = "partword.rs"] mod partword; @@ -46,11 +46,10 @@ macro_rules! atomic_load_store { #[inline] unsafe fn atomic_load( src: *const MaybeUninit, - out: *mut MaybeUninit, order: Ordering, - ) { + ) -> MaybeUninit { debug_assert!(src as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); + let out: MaybeUninit; // SAFETY: the caller must uphold the safety contract. unsafe { @@ -59,15 +58,12 @@ macro_rules! atomic_load_store { asm!( ".set push", ".set noat", - // (atomic) load from src to tmp - concat!("l", $asm_suffix, " {tmp}, 0({src})"), + // (atomic) load from src to out + concat!("l", $asm_suffix, " {out}, 0({src})"), $acquire, - // store tmp to out - concat!("s", $asm_suffix, " {tmp}, 0({out})"), ".set pop", src = in(reg) ptr_reg!(src), - out = in(reg) ptr_reg!(out), - tmp = out(reg) _, + out = out(reg) out, options(nostack), ) }; @@ -79,17 +75,17 @@ macro_rules! atomic_load_store { _ => unreachable!("{:?}", order), } } + out } } impl AtomicStore for $int_type { #[inline] unsafe fn atomic_store( dst: *mut MaybeUninit, - val: *const MaybeUninit, + val: MaybeUninit, order: Ordering, ) { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(val as usize % mem::align_of::<$int_type>() == 0); // SAFETY: the caller must uphold the safety contract. unsafe { @@ -98,16 +94,13 @@ macro_rules! atomic_load_store { asm!( ".set push", ".set noat", - // load from val to tmp - concat!("l", $asm_suffix, $l_u_suffix, " {tmp}, 0({val})"), - // (atomic) store tmp to dst + // (atomic) store val to dst $release, // release fence - concat!("s", $asm_suffix, " {tmp}, 0({dst})"), + concat!("s", $asm_suffix, " {val}, 0({dst})"), $acquire, // acquire fence ".set pop", dst = in(reg) ptr_reg!(dst), - val = in(reg) ptr_reg!(val), - tmp = out(reg) _, + val = in(reg) val, options(nostack), ) }; @@ -127,13 +120,11 @@ macro_rules! atomic { #[inline] unsafe fn atomic_swap( dst: *mut MaybeUninit, - val: *const MaybeUninit, - out: *mut MaybeUninit, + val: MaybeUninit, order: Ordering, - ) { + ) -> MaybeUninit { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(val as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); + let mut out: MaybeUninit; // SAFETY: the caller must uphold the safety contract. unsafe { @@ -142,51 +133,43 @@ macro_rules! atomic { asm!( ".set push", ".set noat", - // load from val to val_tmp - concat!("l", $asm_suffix, " {val_tmp}, 0({val})"), // (atomic) swap (LL/SC loop) $release, // release fence "2:", - // load from dst to out_tmp - concat!("ll", $ll_sc_suffix, " {out_tmp}, 0({dst})"), - "move {r}, {val_tmp}", + // load from dst to out + concat!("ll", $ll_sc_suffix, " {out}, 0({dst})"), + "move {r}, {val}", // try to store val to dst concat!("sc", $ll_sc_suffix, " {r}, 0({dst})"), // 1 if the store was successful, 0 if no store was performed "beqz {r}, 2b", $acquire, // acquire fence - // store out_tmp to out - concat!("s", $asm_suffix, " {out_tmp}, 0({out})"), ".set pop", - dst = inout(reg) ptr_reg!(dst) => _, - val = in(reg) ptr_reg!(val), - out = inout(reg) ptr_reg!(out) => _, - val_tmp = out(reg) _, - out_tmp = out(reg) _, - r = lateout(reg) _, + dst = in(reg) ptr_reg!(dst), + val = in(reg) val, + out = out(reg) out, + r = out(reg) _, options(nostack), ) }; } atomic_rmw!(swap, order); } + out } } impl AtomicCompareExchange for $int_type { #[inline] unsafe fn atomic_compare_exchange( dst: *mut MaybeUninit, - old: *const MaybeUninit, - new: *const MaybeUninit, - out: *mut MaybeUninit, + old: MaybeUninit, + new: MaybeUninit, success: Ordering, failure: Ordering, - ) -> bool { + ) -> (MaybeUninit, bool) { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(old as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(new as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); let order = crate::utils::upgrade_success_ordering(success, failure); + let mut out: MaybeUninit; // SAFETY: the caller must uphold the safety contract. unsafe { @@ -196,42 +179,34 @@ macro_rules! atomic { asm!( ".set push", ".set noat", - // load from old/new to old_tmp/new_tmp - concat!("l", $asm_suffix, " {old_tmp}, 0({old})"), - concat!("l", $asm_suffix, " {new_tmp}, 0({new})"), // (atomic) CAS (LL/SC loop) $release, // release fence "2:", - // load from dst to out_tmp - concat!("ll", $ll_sc_suffix, " {out_tmp}, 0({dst})"), - "bne {out_tmp}, {old_tmp}, 3f", // compare and jump if compare failed - "move {r}, {new_tmp}", + // load from dst to out + concat!("ll", $ll_sc_suffix, " {out}, 0({dst})"), + "bne {out}, {old}, 3f", // compare and jump if compare failed + "move {r}, {new}", // try to store new to dst concat!("sc", $ll_sc_suffix, " {r}, 0({dst})"), // 1 if the store was successful, 0 if no store was performed "beqz {r}, 2b", // continue loop if store failed "3:", $acquire, // acquire fence - "xor {new_tmp}, {out_tmp}, {old_tmp}", - // store out_tmp to out - concat!("s", $asm_suffix, " {out_tmp}, 0({out})"), - "sltiu {r}, {new_tmp}, 1", + "xor {new}, {out}, {old}", + "sltiu {r}, {new}, 1", ".set pop", - dst = inout(reg) ptr_reg!(dst) => _, - old = in(reg) ptr_reg!(old), - new = in(reg) ptr_reg!(new), - out = inout(reg) ptr_reg!(out) => _, - new_tmp = out(reg) _, - old_tmp = out(reg) _, - out_tmp = out(reg) _, - r = lateout(reg) r, + dst = in(reg) ptr_reg!(dst), + old = in(reg) old, + new = inout(reg) new => _, + out = out(reg) out, + r = out(reg) r, options(nostack), ) }; } atomic_rmw!(cmpxchg, order); debug_assert!(r == 0 || r == 1, "r={}", r); - r != 0 + (out, r != 0) } } } @@ -246,14 +221,12 @@ macro_rules! atomic_sub_word { #[inline] unsafe fn atomic_swap( dst: *mut MaybeUninit, - val: *const MaybeUninit, - out: *mut MaybeUninit, + val: MaybeUninit, order: Ordering, - ) { + ) -> MaybeUninit { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(val as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); let (aligned_ptr, shift, mask) = partword::create_mask_values(dst); + let mut out: MaybeUninit; // SAFETY: the caller must uphold the safety contract. unsafe { @@ -265,63 +238,56 @@ macro_rules! atomic_sub_word { asm!( ".set push", ".set noat", - concat!("l", $asm_suffix, "u {tmp}, 0($5)"), "sllv {mask}, {mask}, {shift}", - "sllv $7, {tmp}, {shift}", - "nor $5, $zero, {mask}", + "sllv {val}, {val}, {shift}", + "and {val}, {val}, {mask}", + "nor {inv_mask}, $zero, {mask}", // (atomic) swap (LL/SC loop) $release, "2:", - "ll $8, 0({dst})", - "and $9, $7, {mask}", - "and $10, $8, $5", - "or $10, $10, $9", - "sc $10, 0({dst})", - "beqz $10, 2b", - "and {tmp}, $8, {mask}", - "srlv {tmp}, {tmp}, {shift}", - concat!("se", $asm_suffix, " {tmp}, {tmp}"), + "ll {out}, 0({dst})", + "and {tmp}, {out}, {inv_mask}", + "or {tmp}, {tmp}, {val}", + "sc {tmp}, 0({dst})", + "beqz {tmp}, 2b", + "and {out}, {out}, {mask}", + "srlv {out}, {out}, {shift}", + concat!("se", $asm_suffix, " {out}, {out}"), $acquire, - concat!("s", $asm_suffix, " {tmp}, 0({out})"), ".set pop", dst = in(reg) ptr_reg!(aligned_ptr), - out = in(reg) ptr_reg!(out), + val = inout(reg) crate::utils::zero_extend(val) => _, + out = out(reg) out, shift = in(reg) shift, mask = inout(reg) mask => _, + inv_mask = out(reg) _, tmp = out(reg) _, - inout("$5") ptr_reg!(val) => _, // val => inv_mask - out("$7") _, - out("$8") _, - out("$9") _, - out("$10") _, options(nostack), ) }; } atomic_rmw!(swap, order); } + out } } impl AtomicCompareExchange for $int_type { #[inline] unsafe fn atomic_compare_exchange( dst: *mut MaybeUninit, - old: *const MaybeUninit, - new: *const MaybeUninit, - out: *mut MaybeUninit, + old: MaybeUninit, + new: MaybeUninit, success: Ordering, failure: Ordering, - ) -> bool { + ) -> (MaybeUninit, bool) { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(old as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(new as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); let order = crate::utils::upgrade_success_ordering(success, failure); let (aligned_ptr, shift, _mask) = partword::create_mask_values(dst); + let mut out: MaybeUninit; // SAFETY: the caller must uphold the safety contract. unsafe { - let mut r: XSize; + let mut r: XSize = 1; // Implement sub-word atomic operations using word-sized LL/SC loop. // Based on assemblies generated by rustc/LLVM. // See also partword.rs. @@ -330,51 +296,47 @@ macro_rules! atomic_sub_word { asm!( ".set push", ".set noat", - concat!("l", $asm_suffix, "u $2, 0($6)"), // new - concat!("l", $asm_suffix, " {tmp}, 0($5)"), // old - concat!("ori $5, $zero, ", $max), - concat!("andi $8, {tmp}, ", $max), - concat!("andi $2, $2, ", $max), - "sllv $5, $5, {shift}", - "sllv $8, $8, {shift}", - "sllv $9, $2, {shift}", - "nor $6, $zero, $5", + concat!("ori {mask}, $zero, ", $max), + concat!("andi {old}, {old}, ", $max), + concat!("andi {new}, {new}, ", $max), + "sllv {mask}, {mask}, {shift}", + "sllv {old}, {old}, {shift}", + "sllv {new}, {new}, {shift}", + "nor {inv_mask}, $zero, {mask}", // (atomic) CAS (LL/SC loop) $release, "2:", - "ll $10, 0({dst})", - "and $11, $10, $5", - "bne $11, $8, 3f", - "and $10, $10, $6", - "or $10, $10, $9", - "sc $10, 0({dst})", - "beqz $10, 2b", + "ll {tmp}, 0({dst})", + "and {out}, {tmp}, {mask}", + "bne {out}, {old}, 3f", + "and {tmp}, {tmp}, {inv_mask}", + "or {tmp}, {tmp}, {new}", + "sc {tmp}, 0({dst})", + "beqz {tmp}, 2b", + "b 4f", "3:", - "srlv $2, $11, {shift}", - concat!("se", $asm_suffix, " $2, $2"), + "li {r}, 0", + "4:", + "srlv {out}, {out}, {shift}", + concat!("se", $asm_suffix, " {out}, {out}"), $acquire, - "xor {tmp}, $2, {tmp}", - concat!("s", $asm_suffix, " $2, 0({out})"), - "sltiu $2, {tmp}, 1", ".set pop", dst = in(reg) ptr_reg!(aligned_ptr), - out = in(reg) ptr_reg!(out), + old = inout(reg) old => _, + new = inout(reg) crate::utils::zero_extend(new) => _, + out = out(reg) out, shift = in(reg) shift, + mask = out(reg) _, + inv_mask = out(reg) _, tmp = out(reg) _, - out("$2") r, - inout("$5") ptr_reg!(old) => _, // old => mask - inout("$6") ptr_reg!(new) => _, // new => inv_mask - out("$8") _, - out("$9") _, - out("$10") _, - out("$11") _, + r = inout(reg) r, options(nostack), ) }; } atomic_rmw!(cmpxchg, order); debug_assert!(r == 0 || r == 1, "r={}", r); - r != 0 + (out, r != 0) } } } diff --git a/src/arch/mod.rs b/src/arch/mod.rs index af0e6829..b2bfc694 100644 --- a/src/arch/mod.rs +++ b/src/arch/mod.rs @@ -26,7 +26,7 @@ target_arch = "aarch64", target_arch = "riscv32", target_arch = "riscv64", - all(target_arch = "loongarch64", not(atomic_maybe_uninit_no_loongarch64_asm)), + target_arch = "loongarch64", all( any( target_arch = "avr", @@ -79,7 +79,6 @@ mod avr; #[cfg(atomic_maybe_uninit_unstable_asm_experimental_arch)] mod hexagon; #[cfg(target_arch = "loongarch64")] -#[cfg(not(atomic_maybe_uninit_no_loongarch64_asm))] mod loongarch; #[cfg(any( target_arch = "mips", diff --git a/src/arch/msp430.rs b/src/arch/msp430.rs index 7d663afc..a124738f 100644 --- a/src/arch/msp430.rs +++ b/src/arch/msp430.rs @@ -14,45 +14,39 @@ macro_rules! atomic { #[inline] unsafe fn atomic_load( src: *const MaybeUninit, - out: *mut MaybeUninit, _order: Ordering, - ) { + ) -> MaybeUninit { + let out: MaybeUninit; + // SAFETY: the caller must uphold the safety contract. unsafe { // atomic load is always SeqCst. asm!( - // TODO: use mem to mem mov? - // (atomic) load from src to tmp - concat!("mov", $asm_suffix, " @{src}, {tmp}"), - // store tmp to out - concat!("mov", $asm_suffix, " {tmp}, 0({out})"), + // (atomic) load from src to out + concat!("mov", $asm_suffix, " @{src}, {out}"), src = in(reg) src, - out = inout(reg) out => _, - tmp = lateout(reg) _, + out = lateout(reg) out, options(nostack, preserves_flags), ); } + out } } impl AtomicStore for $int_type { #[inline] unsafe fn atomic_store( dst: *mut MaybeUninit, - val: *const MaybeUninit, + val: MaybeUninit, _order: Ordering, ) { // SAFETY: the caller must uphold the safety contract. unsafe { // atomic store is always SeqCst. asm!( - // TODO: use mem to mem mov? - // load from val to tmp - concat!("mov", $asm_suffix, " @{val}, {tmp}"), - // (atomic) store tmp to dst - concat!("mov", $asm_suffix, " {tmp}, 0({dst})"), - dst = inout(reg) dst => _, + // (atomic) store val to dst + concat!("mov", $asm_suffix, " {val}, 0({dst})"), + dst = in(reg) dst, val = in(reg) val, - tmp = lateout(reg) _, options(nostack, preserves_flags), ); } diff --git a/src/arch/powerpc.rs b/src/arch/powerpc.rs index c6473a9d..30a324cc 100644 --- a/src/arch/powerpc.rs +++ b/src/arch/powerpc.rs @@ -9,11 +9,11 @@ // - portable-atomic https://github.com/taiki-e/portable-atomic // // Generated asm: -// - powerpc https://godbolt.org/z/PME7czo4P -// - powerpc64 https://godbolt.org/z/forK75PK4 -// - powerpc64 (pwr8) https://godbolt.org/z/eGf47W164 -// - powerpc64le https://godbolt.org/z/7f1b8WWd3 -// - powerpc64le (pwr7) https://godbolt.org/z/bKxv6W3Mn +// - powerpc https://godbolt.org/z/s7zvz8TWz +// - powerpc64 https://godbolt.org/z/8GTKnn9vG +// - powerpc64 (pwr8) https://godbolt.org/z/n55d5EGe9 +// - powerpc64le https://godbolt.org/z/r3xa399MP +// - powerpc64le (pwr7) https://godbolt.org/z/TYsbsWGe3 #[cfg(not(all( target_arch = "powerpc64", @@ -32,51 +32,12 @@ use core::{ }; use crate::raw::{AtomicCompareExchange, AtomicLoad, AtomicStore, AtomicSwap}; - -#[cfg(target_arch = "powerpc64")] -#[cfg(any( - target_feature = "quadword-atomics", - atomic_maybe_uninit_target_feature = "quadword-atomics", -))] -#[cfg(target_endian = "big")] -macro_rules! p128h { - () => { - "0" - }; -} -#[cfg(target_arch = "powerpc64")] -#[cfg(any( - target_feature = "quadword-atomics", - atomic_maybe_uninit_target_feature = "quadword-atomics", -))] -#[cfg(target_endian = "big")] -macro_rules! p128l { - () => { - "8" - }; -} #[cfg(target_arch = "powerpc64")] #[cfg(any( target_feature = "quadword-atomics", atomic_maybe_uninit_target_feature = "quadword-atomics", ))] -#[cfg(target_endian = "little")] -macro_rules! p128h { - () => { - "8" - }; -} -#[cfg(target_arch = "powerpc64")] -#[cfg(any( - target_feature = "quadword-atomics", - atomic_maybe_uninit_target_feature = "quadword-atomics", -))] -#[cfg(target_endian = "little")] -macro_rules! p128l { - () => { - "0" - }; -} +use crate::utils::{MaybeUninit128, Pair}; macro_rules! atomic_rmw { ($op:ident, $order:ident) => { @@ -110,26 +71,22 @@ macro_rules! atomic_load_store { #[inline] unsafe fn atomic_load( src: *const MaybeUninit, - out: *mut MaybeUninit, order: Ordering, - ) { + ) -> MaybeUninit { debug_assert!(src as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); + let out: MaybeUninit; // SAFETY: the caller must uphold the safety contract. unsafe { macro_rules! atomic_load { ($acquire:tt, $release:tt) => { asm!( - // (atomic) load from src to tmp + // (atomic) load from src to out $release, - concat!("l", $l_suffix, " {tmp}, 0({src})"), + concat!("l", $l_suffix, " {out}, 0({src})"), $acquire, - // store tmp to out - concat!("st", $asm_suffix, " {tmp}, 0({out})"), src = in(reg_nonzero) ptr_reg!(src), - out = inout(reg_nonzero) ptr_reg!(out) => _, - tmp = lateout(reg_nonzero) _, + out = lateout(reg) out, options(nostack, preserves_flags), ) }; @@ -139,19 +96,16 @@ macro_rules! atomic_load_store { ($release:tt) => { asm!( $release, - // (atomic) load from src to tmp - concat!("l", $l_suffix, " {tmp}, 0({src})"), + // (atomic) load from src to out + concat!("l", $l_suffix, " {out}, 0({src})"), // Lightweight acquire sync // Refs: https://github.com/boostorg/atomic/blob/boost-1.79.0/include/boost/atomic/detail/core_arch_ops_gcc_ppc.hpp#L47-L62 - "cmpd %cr7, {tmp}, {tmp}", + "cmpd %cr7, {out}, {out}", "bne- %cr7, 2f", "2:", "isync", - // store tmp to out - concat!("st", $asm_suffix, " {tmp}, 0({out})"), src = in(reg_nonzero) ptr_reg!(src), - out = inout(reg_nonzero) ptr_reg!(out) => _, - tmp = lateout(reg_nonzero) _, + out = lateout(reg) out, out("cr7") _, options(nostack, preserves_flags), ) @@ -170,31 +124,28 @@ macro_rules! atomic_load_store { _ => unreachable!("{:?}", order), } } + out } } impl AtomicStore for $int_type { #[inline] unsafe fn atomic_store( dst: *mut MaybeUninit, - val: *const MaybeUninit, + val: MaybeUninit, order: Ordering, ) { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(val as usize % mem::align_of::<$int_type>() == 0); // SAFETY: the caller must uphold the safety contract. unsafe { macro_rules! atomic_store { ($release:tt) => { asm!( - // load from val to tmp - concat!("l", $l_suffix, " {tmp}, 0({val})"), - // (atomic) store tmp to dst + // (atomic) store val to dst $release, - concat!("st", $asm_suffix, " {tmp}, 0({dst})"), - dst = inout(reg_nonzero) ptr_reg!(dst) => _, - val = in(reg_nonzero) ptr_reg!(val), - tmp = lateout(reg_nonzero) _, + concat!("st", $asm_suffix, " {val}, 0({dst})"), + dst = in(reg_nonzero) ptr_reg!(dst), + val = in(reg) val, options(nostack, preserves_flags), ) }; @@ -219,36 +170,29 @@ macro_rules! atomic { #[inline] unsafe fn atomic_swap( dst: *mut MaybeUninit, - val: *const MaybeUninit, - out: *mut MaybeUninit, + val: MaybeUninit, order: Ordering, - ) { + ) -> MaybeUninit { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(val as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); + let mut out: MaybeUninit; // SAFETY: the caller must uphold the safety contract. unsafe { macro_rules! swap { ($acquire:tt, $release:tt) => { asm!( - // load from val (ptr) to val (val) - concat!("l", $l_suffix, " {val}, 0({val})"), // (atomic) swap (LL/SC loop) $release, "2:", - // load from dst to tmp - concat!("l", $asm_suffix, "arx {tmp}, 0, {dst}"), + // load from dst to out + concat!("l", $asm_suffix, "arx {out}, 0, {dst}"), // try to store val to dst concat!("st", $asm_suffix, "cx. {val}, 0, {dst}"), "bne %cr0, 2b", $acquire, - // store tmp to out - concat!("st", $asm_suffix, " {tmp}, 0({out})"), dst = in(reg_nonzero) ptr_reg!(dst), - val = inout(reg_nonzero) ptr_reg!(val) => _, - out = in(reg_nonzero) ptr_reg!(out), - tmp = out(reg_nonzero) _, + val = in(reg) val, + out = out(reg) out, out("cr0") _, options(nostack, preserves_flags), ) @@ -256,23 +200,21 @@ macro_rules! atomic { } atomic_rmw!(swap, order); } + out } } impl AtomicCompareExchange for $int_type { #[inline] unsafe fn atomic_compare_exchange( dst: *mut MaybeUninit, - old: *const MaybeUninit, - new: *const MaybeUninit, - out: *mut MaybeUninit, + old: MaybeUninit, + new: MaybeUninit, success: Ordering, failure: Ordering, - ) -> bool { + ) -> (MaybeUninit, bool) { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(old as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(new as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); let order = crate::utils::upgrade_success_ordering(success, failure); + let mut out: MaybeUninit; // SAFETY: the caller must uphold the safety contract. unsafe { @@ -280,14 +222,11 @@ macro_rules! atomic { macro_rules! cmpxchg { ($acquire:tt, $release:tt) => { asm!( - // load from old/new (ptr) to old/new (val) - concat!("l", $l_suffix, " {old}, 0({old})"), - concat!("l", $l_suffix, " {new}, 0({new})"), // (atomic) CAS (LL/SC loop) $release, "2:", - concat!("l", $asm_suffix, "arx {tmp}, 0, {dst}"), - concat!("cmp", $cmp_suffix, " {old}, {tmp}"), + concat!("l", $asm_suffix, "arx {out}, 0, {dst}"), + concat!("cmp", $cmp_suffix, " {old}, {out}"), "bne %cr0, 3f", // jump if compare failed concat!("st", $asm_suffix, "cx. {new}, 0, {dst}"), "bne %cr0, 2b", // continue loop if store failed @@ -295,21 +234,18 @@ macro_rules! atomic { // if compare failed EQ bit is cleared, if stqcx succeeds EQ bit is set. "mfcr {r}", $acquire, - // store tmp to out - concat!("st", $asm_suffix, " {tmp}, 0({out})"), dst = in(reg_nonzero) ptr_reg!(dst), - old = inout(reg_nonzero) ptr_reg!(old) => _, - new = inout(reg_nonzero) ptr_reg!(new) => _, - out = inout(reg_nonzero) ptr_reg!(out) => _, - tmp = out(reg_nonzero) _, - r = lateout(reg_nonzero) r, + old = in(reg) crate::utils::zero_extend(old), + new = in(reg) new, + out = out(reg) out, + r = lateout(reg) r, out("cr0") _, options(nostack, preserves_flags), ) }; } atomic_rmw!(cmpxchg, order); - extract_cr0(r) + (out, extract_cr0(r)) } } } @@ -331,14 +267,12 @@ macro_rules! atomic_sub_word { #[inline] unsafe fn atomic_swap( dst: *mut MaybeUninit, - val: *const MaybeUninit, - out: *mut MaybeUninit, + val: MaybeUninit, order: Ordering, - ) { + ) -> MaybeUninit { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(val as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); let (aligned_ptr, shift, mask) = partword::create_mask_values(dst); + let mut out: MaybeUninit; // SAFETY: the caller must uphold the safety contract. unsafe { @@ -348,28 +282,25 @@ macro_rules! atomic_sub_word { macro_rules! swap { ($acquire:tt, $release:tt) => { asm!( - concat!("l", $l_suffix, " {val}, 0({val})"), "slw {mask}, {mask}, {shift}", "slw {val}, {val}, {shift}", "and {val}, {val}, {mask}", // (atomic) swap (LL/SC loop) $release, "2:", - "lwarx {tmp1}, 0, {dst}", - "andc {tmp2}, {tmp1}, {mask}", - "or {tmp2}, {val}, {tmp2}", - "stwcx. {tmp2}, 0, {dst}", + "lwarx {out}, 0, {dst}", + "andc {tmp}, {out}, {mask}", + "or {tmp}, {val}, {tmp}", + "stwcx. {tmp}, 0, {dst}", "bne %cr0, 2b", - "srw {tmp1}, {tmp1}, {shift}", + "srw {out}, {out}, {shift}", $acquire, - concat!("st", $asm_suffix, " {tmp1}, 0({out})"), dst = in(reg_nonzero) ptr_reg!(aligned_ptr), - val = inout(reg_nonzero) ptr_reg!(val) => _, - out = in(reg_nonzero) ptr_reg!(out), - shift = in(reg_nonzero) shift, - mask = inout(reg_nonzero) mask => _, - tmp1 = out(reg_nonzero) _, - tmp2 = out(reg_nonzero) _, + val = inout(reg) crate::utils::zero_extend(val) => _, + out = out(reg) out, + shift = in(reg) shift, + mask = inout(reg) mask => _, + tmp = out(reg) _, out("cr0") _, options(nostack, preserves_flags), ) @@ -377,24 +308,22 @@ macro_rules! atomic_sub_word { } atomic_rmw!(swap, order); } + out } } impl AtomicCompareExchange for $int_type { #[inline] unsafe fn atomic_compare_exchange( dst: *mut MaybeUninit, - old: *const MaybeUninit, - new: *const MaybeUninit, - out: *mut MaybeUninit, + old: MaybeUninit, + new: MaybeUninit, success: Ordering, failure: Ordering, - ) -> bool { + ) -> (MaybeUninit, bool) { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(old as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(new as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); let order = crate::utils::upgrade_success_ordering(success, failure); let (aligned_ptr, shift, mask) = partword::create_mask_values(dst); + let mut out: MaybeUninit; // SAFETY: the caller must uphold the safety contract. unsafe { @@ -405,8 +334,6 @@ macro_rules! atomic_sub_word { macro_rules! cmpxchg { ($acquire:tt, $release:tt) => { asm!( - concat!("l", $l_suffix, " {old}, 0({old})"), - concat!("l", $l_suffix, " {new}, 0({new})"), "slw {mask}, {mask}, {shift}", "slw {old}, {old}, {shift}", "slw {new}, {new}, {shift}", @@ -415,36 +342,34 @@ macro_rules! atomic_sub_word { // (atomic) CAS (LL/SC loop) $release, "2:", - "lwarx {tmp2}, 0, {dst}", - "and {tmp1}, {tmp2}, {mask}", - "cmpw {tmp1}, {old}", + "lwarx {tmp}, 0, {dst}", + "and {out}, {tmp}, {mask}", + "cmpw {out}, {old}", "bne %cr0, 3f", - "andc {tmp2}, {tmp2}, {mask}", - "or {tmp2}, {tmp2}, {new}", - "stwcx. {tmp2}, 0, {dst}", + "andc {tmp}, {tmp}, {mask}", + "or {tmp}, {tmp}, {new}", + "stwcx. {tmp}, 0, {dst}", "bne %cr0, 2b", "3:", - "srw {tmp1}, {tmp1}, {shift}", + "srw {out}, {out}, {shift}", // if compare failed EQ bit is cleared, if stqcx succeeds EQ bit is set. "mfcr {r}", $acquire, - concat!("st", $asm_suffix, " {tmp1}, 0({out})"), dst = in(reg_nonzero) ptr_reg!(aligned_ptr), - old = inout(reg_nonzero) ptr_reg!(old) => _, - new = inout(reg_nonzero) ptr_reg!(new) => _, - out = inout(reg_nonzero) ptr_reg!(out) => _, - shift = in(reg_nonzero) shift, - mask = inout(reg_nonzero) mask => _, - r = lateout(reg_nonzero) r, - tmp1 = out(reg_nonzero) _, - tmp2 = out(reg_nonzero) _, + old = inout(reg) crate::utils::zero_extend(old) => _, + new = inout(reg) crate::utils::zero_extend(new) => _, + out = out(reg) out, + shift = in(reg) shift, + mask = inout(reg) mask => _, + r = lateout(reg) r, + tmp = out(reg) _, out("cr0") _, options(nostack, preserves_flags), ) }; } atomic_rmw!(cmpxchg, order); - extract_cr0(r) + (out, extract_cr0(r)) } } } @@ -536,11 +461,10 @@ macro_rules! atomic128 { #[inline] unsafe fn atomic_load( src: *const MaybeUninit, - out: *mut MaybeUninit, order: Ordering, - ) { + ) -> MaybeUninit { debug_assert!(src as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); + let (prev_hi, prev_lo); // SAFETY: the caller must uphold the safety contract. unsafe { @@ -550,20 +474,17 @@ macro_rules! atomic128 { // (atomic) load from src to out pair $release, "lq %r4, 0({src})", + // Lightweight acquire sync // Refs: https://github.com/boostorg/atomic/blob/boost-1.79.0/include/boost/atomic/detail/core_arch_ops_gcc_ppc.hpp#L47-L62 "cmpd %cr7, %r4, %r4", "bne- %cr7, 2f", "2:", "isync", - // store out pair to out - concat!("std %r4, ", p128h!(), "({out})"), - concat!("std %r5, ", p128l!(), "({out})"), src = in(reg_nonzero) ptr_reg!(src), - out = inout(reg_nonzero) ptr_reg!(out) => _, // Quadword atomic instructions work with even/odd pair of specified register and subsequent register. // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater. - out("r4") _, // out (hi) - out("r5") _, // out (lo) + out("r4") prev_hi, + out("r5") prev_lo, out("cr7") _, options(nostack, preserves_flags), ) @@ -574,22 +495,19 @@ macro_rules! atomic128 { asm!( // (atomic) load from src to out pair "lq %r4, 0({src})", - // store out pair to out - concat!("std %r4, ", p128h!(), "({out})"), - concat!("std %r5, ", p128l!(), "({out})"), src = in(reg_nonzero) ptr_reg!(src), - out = inout(reg_nonzero) ptr_reg!(out) => _, // Quadword atomic instructions work with even/odd pair of specified register and subsequent register. // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater. - out("r4") _, // out (hi) - out("r5") _, // out (lo) + out("r4") prev_hi, + out("r5") prev_lo, options(nostack, preserves_flags), - ) + ); } Ordering::Acquire => atomic_load_acquire!(""), Ordering::SeqCst => atomic_load_acquire!("sync"), _ => unreachable!("{:?}", order), } + MaybeUninit128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.$int_type } } } @@ -597,29 +515,25 @@ macro_rules! atomic128 { #[inline] unsafe fn atomic_store( dst: *mut MaybeUninit, - val: *const MaybeUninit, + val: MaybeUninit, order: Ordering, ) { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(val as usize % mem::align_of::<$int_type>() == 0); + let val = MaybeUninit128 { $int_type: val }; // SAFETY: the caller must uphold the safety contract. unsafe { macro_rules! atomic_store { ($release:tt) => { asm!( - // load from val to val pair - concat!("ld %r4, ", p128h!(), "({val})"), - concat!("ld %r5, ", p128l!(), "({val})"), // (atomic) store val pair to dst $release, "stq %r4, 0({dst})", - dst = inout(reg_nonzero) ptr_reg!(dst) => _, - val = in(reg_nonzero) ptr_reg!(val), + dst = in(reg_nonzero) ptr_reg!(dst), // Quadword atomic instructions work with even/odd pair of specified register and subsequent register. // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater. - out("r4") _, // val (hi) - lateout("r5") _, // val (lo) + in("r4") val.pair.hi, + in("r5") val.pair.lo, options(nostack, preserves_flags), ) }; @@ -637,49 +551,41 @@ macro_rules! atomic128 { #[inline] unsafe fn atomic_swap( dst: *mut MaybeUninit, - val: *const MaybeUninit, - out: *mut MaybeUninit, + val: MaybeUninit, order: Ordering, - ) { + ) -> MaybeUninit { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(val as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); + let val = MaybeUninit128 { $int_type: val }; + let (mut prev_hi, mut prev_lo); // SAFETY: the caller must uphold the safety contract. unsafe { macro_rules! swap { ($acquire:tt, $release:tt) => { asm!( - // load from val to val pair - concat!("ld %r4, ", p128h!(), "({val})"), - concat!("ld %r5, ", p128l!(), "({val})"), // (atomic) swap (LL/SC loop) $release, "2:", // load from dst to out pair "lqarx %r6, 0, {dst}", // try to store val pair to dst - "stqcx. %r4, 0, {dst}", + "stqcx. %r8, 0, {dst}", "bne %cr0, 2b", $acquire, - // store out pair to out - concat!("std %r6, ", p128h!(), "({out})"), - concat!("std %r7, ", p128l!(), "({out})"), - dst = inout(reg_nonzero) ptr_reg!(dst) => _, - val = in(reg_nonzero) ptr_reg!(val), - out = inout(reg_nonzero) ptr_reg!(out) => _, + dst = in(reg_nonzero) ptr_reg!(dst), // Quadword atomic instructions work with even/odd pair of specified register and subsequent register. // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater. - out("r4") _, // val (hi) - lateout("r5") _, // val (lo) - out("r6") _, // out (hi) - out("r7") _, // out (lo) + out("r6") prev_hi, + out("r7") prev_lo, + in("r8") val.pair.hi, + in("r9") val.pair.lo, out("cr0") _, options(nostack, preserves_flags), ) }; } atomic_rmw!(swap, order); + MaybeUninit128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.$int_type } } } @@ -687,17 +593,16 @@ macro_rules! atomic128 { #[inline] unsafe fn atomic_compare_exchange( dst: *mut MaybeUninit, - old: *const MaybeUninit, - new: *const MaybeUninit, - out: *mut MaybeUninit, + old: MaybeUninit, + new: MaybeUninit, success: Ordering, failure: Ordering, - ) -> bool { + ) -> (MaybeUninit, bool) { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(old as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(new as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); let order = crate::utils::upgrade_success_ordering(success, failure); + let old = MaybeUninit128 { $int_type: old }; + let new = MaybeUninit128 { $int_type: new }; + let (mut prev_hi, mut prev_lo); // SAFETY: the caller must uphold the safety contract. unsafe { @@ -705,17 +610,12 @@ macro_rules! atomic128 { macro_rules! cmpxchg { ($acquire:tt, $release:tt) => { asm!( - // load from old/new to old/new pairs - concat!("ld %r4, ", p128h!(), "({old})"), - concat!("ld %r5, ", p128l!(), "({old})"), - concat!("ld %r6, ", p128h!(), "({new})"), - concat!("ld %r7, ", p128l!(), "({new})"), // (atomic) CAS (LL/SC loop) $release, "2:", "lqarx %r8, 0, {dst}", - "xor {tmp_lo}, %r9, %r5", - "xor {tmp_hi}, %r8, %r4", + "xor {tmp_lo}, %r9, {old_lo}", + "xor {tmp_hi}, %r8, {old_hi}", "or. {tmp_lo}, {tmp_lo}, {tmp_hi}", "bne %cr0, 3f", // jump if compare failed "stqcx. %r6, 0, {dst}", @@ -724,30 +624,27 @@ macro_rules! atomic128 { // if compare failed EQ bit is cleared, if stqcx succeeds EQ bit is set. "mfcr {tmp_lo}", $acquire, - // store out pair to out - concat!("std %r8, ", p128h!(), "({out})"), - concat!("std %r9, ", p128l!(), "({out})"), - dst = inout(reg_nonzero) ptr_reg!(dst) => _, - old = in(reg_nonzero) ptr_reg!(old), - new = in(reg_nonzero) ptr_reg!(new), - out = inout(reg_nonzero) ptr_reg!(out) => _, - tmp_hi = out(reg_nonzero) _, - tmp_lo = out(reg_nonzero) r, + dst = in(reg_nonzero) ptr_reg!(dst), + old_hi = in(reg) old.pair.hi, + old_lo = in(reg) old.pair.lo, + tmp_hi = out(reg) _, + tmp_lo = out(reg) r, // Quadword atomic instructions work with even/odd pair of specified register and subsequent register. // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater. - out("r4") _, // old (hi) - out("r5") _, // old (lo) - out("r6") _, // new (hi) - lateout("r7") _, // new (lo) - lateout("r8") _, // out (hi) - lateout("r9") _, // out (lo) + in("r6") new.pair.hi, + in("r7") new.pair.lo, + out("r8") prev_hi, + out("r9") prev_lo, out("cr0") _, options(nostack, preserves_flags), ) }; } atomic_rmw!(cmpxchg, order); - extract_cr0(r) + ( + MaybeUninit128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.$int_type, + extract_cr0(r) + ) } } } diff --git a/src/arch/riscv.rs b/src/arch/riscv.rs index fd6875d3..5edfa935 100644 --- a/src/arch/riscv.rs +++ b/src/arch/riscv.rs @@ -11,8 +11,8 @@ // - portable-atomic https://github.com/taiki-e/portable-atomic // // Generated asm: -// - riscv64gc https://godbolt.org/z/nW3Po8n4K -// - riscv32imac https://godbolt.org/z/51nPPMYze +// - riscv64gc https://godbolt.org/z/b8M8njGE5 +// - riscv32imac https://godbolt.org/z/en6ThT6h9 #[cfg(any(target_feature = "a", atomic_maybe_uninit_target_feature = "a"))] #[path = "partword.rs"] @@ -84,26 +84,22 @@ macro_rules! atomic_load_store { #[inline] unsafe fn atomic_load( src: *const MaybeUninit, - out: *mut MaybeUninit, order: Ordering, - ) { + ) -> MaybeUninit { debug_assert!(src as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); + let out: MaybeUninit; // SAFETY: the caller must uphold the safety contract. unsafe { macro_rules! atomic_load { ($acquire:tt, $release:tt) => { asm!( - // (atomic) load from src to tmp + // (atomic) load from src to out $release, - concat!("l", $asm_suffix, " {tmp}, 0({src})"), + concat!("l", $asm_suffix, " {out}, 0({src})"), $acquire, - // store tmp to out - concat!("s", $asm_suffix, " {tmp}, 0({out})"), src = in(reg) ptr_reg!(src), - out = inout(reg) ptr_reg!(out) => _, - tmp = lateout(reg) _, + out = lateout(reg) out, options(nostack, preserves_flags), ) }; @@ -115,31 +111,28 @@ macro_rules! atomic_load_store { _ => unreachable!("{:?}", order), } } + out } } impl AtomicStore for $int_type { #[inline] unsafe fn atomic_store( dst: *mut MaybeUninit, - val: *const MaybeUninit, + val: MaybeUninit, order: Ordering, ) { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(val as usize % mem::align_of::<$int_type>() == 0); // SAFETY: the caller must uphold the safety contract. unsafe { macro_rules! atomic_store { ($release:tt) => { asm!( - // load from val to tmp - concat!("l", $asm_suffix, " {tmp}, 0({val})"), - // (atomic) store tmp to dst + // (atomic) store val to dst $release, - concat!("s", $asm_suffix, " {tmp}, 0({dst})"), - dst = inout(reg) ptr_reg!(dst) => _, - val = in(reg) ptr_reg!(val), - tmp = lateout(reg) _, + concat!("s", $asm_suffix, " {val}, 0({dst})"), + dst = in(reg) ptr_reg!(dst), + val = in(reg) val, options(nostack, preserves_flags), ) }; @@ -164,37 +157,31 @@ macro_rules! atomic { #[inline] unsafe fn atomic_swap( dst: *mut MaybeUninit, - val: *const MaybeUninit, - out: *mut MaybeUninit, + val: MaybeUninit, order: Ordering, - ) { + ) -> MaybeUninit { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(val as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); + let out: MaybeUninit; // SAFETY: the caller must uphold the safety contract. unsafe { macro_rules! swap { ($order:tt) => { asm!( - // load from val (ptr) to val (val) - concat!("l", $asm_suffix, " {val}, 0({val})"), // (atomic) swap (AMO) - // - load value from dst and store it to tmp + // - load value from dst and store it to out // - store value of val to dst - concat!("amoswap.", $asm_suffix, $order, " {tmp}, {val}, 0({dst})"), - // store tmp to out - concat!("s", $asm_suffix, " {tmp}, 0({out})"), + concat!("amoswap.", $asm_suffix, $order, " {out}, {val}, 0({dst})"), dst = in(reg) ptr_reg!(dst), - val = inout(reg) ptr_reg!(val) => _, - out = inout(reg) ptr_reg!(out) => _, - tmp = lateout(reg) _, + val = inout(reg) val => _, + out = lateout(reg) out, options(nostack, preserves_flags), ) }; } atomic_rmw_amo!(swap, order); } + out } } #[cfg(any(target_feature = "a", atomic_maybe_uninit_target_feature = "a"))] @@ -202,17 +189,14 @@ macro_rules! atomic { #[inline] unsafe fn atomic_compare_exchange( dst: *mut MaybeUninit, - old: *const MaybeUninit, - new: *const MaybeUninit, - out: *mut MaybeUninit, + old: MaybeUninit, + new: MaybeUninit, success: Ordering, failure: Ordering, - ) -> bool { + ) -> (MaybeUninit, bool) { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(old as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(new as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); let order = crate::utils::upgrade_success_ordering(success, failure); + let mut out: MaybeUninit; // SAFETY: the caller must uphold the safety contract. unsafe { @@ -220,25 +204,19 @@ macro_rules! atomic { macro_rules! cmpxchg { ($acquire:tt, $release:tt) => { asm!( - // load from old/new (ptr) to old/new (val) - concat!("l", $asm_suffix, " {old}, 0({old})"), - concat!("l", $asm_suffix, " {new}, 0({new})"), // (atomic) CAS (LR/SC loop) "2:", - concat!("lr.", $asm_suffix, $acquire, " {tmp}, 0({dst})"), - "bne {tmp}, {old}, 3f", // compare and jump if compare failed + concat!("lr.", $asm_suffix, $acquire, " {out}, 0({dst})"), + "bne {out}, {old}, 3f", // compare and jump if compare failed concat!("sc.", $asm_suffix, $release, " {r}, {new}, 0({dst})"), "bnez {r}, 2b", // continue loop if store failed "3:", - "xor {r}, {tmp}, {old}", + "xor {r}, {out}, {old}", "seqz {r}, {r}", - // store tmp to out - concat!("s", $asm_suffix, " {tmp}, 0({out})"), dst = in(reg) ptr_reg!(dst), - old = inout(reg) ptr_reg!(old) => _, - new = inout(reg) ptr_reg!(new) => _, - out = in(reg) ptr_reg!(out), - tmp = out(reg) _, + old = in(reg) old, + new = in(reg) new, + out = out(reg) out, r = out(reg) r, options(nostack, preserves_flags), ) @@ -246,7 +224,7 @@ macro_rules! atomic { } atomic_rmw_lr_sc!(cmpxchg, order); debug_assert!(r == 0 || r == 1, "r={}", r); - r != 0 + (out, r != 0) } } } @@ -262,14 +240,12 @@ macro_rules! atomic_sub_word { #[inline] unsafe fn atomic_swap( dst: *mut MaybeUninit, - val: *const MaybeUninit, - out: *mut MaybeUninit, + val: MaybeUninit, order: Ordering, - ) { + ) -> MaybeUninit { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(val as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); let (aligned_ptr, shift, mask) = partword::create_mask_values(dst); + let mut out: MaybeUninit; // SAFETY: the caller must uphold the safety contract. unsafe { @@ -279,33 +255,31 @@ macro_rules! atomic_sub_word { macro_rules! swap { ($acquire:tt, $release:tt) => { asm!( - concat!("l", $asm_suffix, "u {val}, 0({val})"), concat!("sll", w!(), " {mask}, {mask}, {shift}"), concat!("sll", w!(), " {val}, {val}, {shift}"), // (atomic) swap (LR/SC loop) "2:", - concat!("lr.w", $acquire, " {tmp1}, 0({dst})"), - "mv {tmp2}, {val}", - "xor {tmp2}, {tmp2}, {tmp1}", - "and {tmp2}, {tmp2}, {mask}", - "xor {tmp2}, {tmp2}, {tmp1}", - concat!("sc.w", $release, " {tmp2}, {tmp2}, 0({dst})"), - "bnez {tmp2}, 2b", - concat!("srl", w!(), " {tmp1}, {tmp1}, {shift}"), - concat!("s", $asm_suffix, " {tmp1}, 0({out})"), + concat!("lr.w", $acquire, " {out}, 0({dst})"), + "mv {tmp}, {val}", + "xor {tmp}, {tmp}, {out}", + "and {tmp}, {tmp}, {mask}", + "xor {tmp}, {tmp}, {out}", + concat!("sc.w", $release, " {tmp}, {tmp}, 0({dst})"), + "bnez {tmp}, 2b", + concat!("srl", w!(), " {out}, {out}, {shift}"), dst = in(reg) ptr_reg!(aligned_ptr), - val = inout(reg) ptr_reg!(val) => _, - out = in(reg) ptr_reg!(out), + val = inout(reg) crate::utils::zero_extend(val) => _, + out = out(reg) out, shift = in(reg) shift, mask = inout(reg) mask => _, - tmp1 = out(reg) _, - tmp2 = out(reg) _, + tmp = out(reg) _, options(nostack, preserves_flags), ) }; } atomic_rmw_lr_sc!(swap, order); } + out } } #[cfg(any(target_feature = "a", atomic_maybe_uninit_target_feature = "a"))] @@ -313,18 +287,15 @@ macro_rules! atomic_sub_word { #[inline] unsafe fn atomic_compare_exchange( dst: *mut MaybeUninit, - old: *const MaybeUninit, - new: *const MaybeUninit, - out: *mut MaybeUninit, + old: MaybeUninit, + new: MaybeUninit, success: Ordering, failure: Ordering, - ) -> bool { + ) -> (MaybeUninit, bool) { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(old as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(new as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); let order = crate::utils::upgrade_success_ordering(success, failure); let (aligned_ptr, shift, mask) = partword::create_mask_values(dst); + let mut out: MaybeUninit; // SAFETY: the caller must uphold the safety contract. unsafe { @@ -335,43 +306,39 @@ macro_rules! atomic_sub_word { macro_rules! cmpxchg { ($acquire:tt, $release:tt) => { asm!( - concat!("l", $asm_suffix, "u {old}, 0({old})"), - concat!("l", $asm_suffix, "u {new}, 0({new})"), concat!("sll", w!(), " {mask}, {mask}, {shift}"), concat!("sll", w!(), " {old}, {old}, {shift}"), concat!("sll", w!(), " {new}, {new}, {shift}"), // (atomic) CAS (LR/SC loop) "2:", - concat!("lr.w", $acquire, " {tmp1}, 0({dst})"), - "and {tmp2}, {tmp1}, {mask}", - "bne {tmp2}, {old}, 3f", - "xor {tmp2}, {tmp1}, {new}", - "and {tmp2}, {tmp2}, {mask}", - "xor {tmp2}, {tmp2}, {tmp1}", - concat!("sc.w", $release, " {tmp2}, {tmp2}, 0({dst})"), - "bnez {tmp2}, 2b", + concat!("lr.w", $acquire, " {tmp}, 0({dst})"), + "and {out}, {tmp}, {mask}", + "bne {out}, {old}, 3f", + "xor {out}, {tmp}, {new}", + "and {out}, {out}, {mask}", + "xor {out}, {out}, {tmp}", + concat!("sc.w", $release, " {out}, {out}, 0({dst})"), + "bnez {out}, 2b", "3:", - concat!("srl", w!(), " {tmp2}, {tmp1}, {shift}"), - "and {tmp1}, {tmp1}, {mask}", - "xor {r}, {old}, {tmp1}", + concat!("srl", w!(), " {out}, {tmp}, {shift}"), + "and {tmp}, {tmp}, {mask}", + "xor {r}, {old}, {tmp}", "seqz {r}, {r}", - concat!("s", $asm_suffix, " {tmp2}, 0({out})"), dst = in(reg) ptr_reg!(aligned_ptr), - old = inout(reg) ptr_reg!(old) => _, - new = inout(reg) ptr_reg!(new) => _, - out = inout(reg) ptr_reg!(out) => _, + old = inout(reg) crate::utils::zero_extend(old) => _, + new = inout(reg) crate::utils::zero_extend(new) => _, + out = out(reg) out, shift = in(reg) shift, mask = inout(reg) mask => _, r = lateout(reg) r, - tmp1 = out(reg) _, - tmp2 = out(reg) _, + tmp = out(reg) _, options(nostack, preserves_flags), ) }; } atomic_rmw_lr_sc!(cmpxchg, order); debug_assert!(r == 0 || r == 1, "r={}", r); - r != 0 + (out, r != 0) } } } diff --git a/src/arch/s390x.rs b/src/arch/s390x.rs index 899c3f70..415c6c09 100644 --- a/src/arch/s390x.rs +++ b/src/arch/s390x.rs @@ -8,8 +8,8 @@ // - portable-atomic https://github.com/taiki-e/portable-atomic // // Generated asm: -// - s390x https://godbolt.org/z/qv8s6o13G -// - s390x (z196) https://godbolt.org/z/jW67E4YEq +// - s390x https://godbolt.org/z/WMa8541M5 +// - s390x (z196) https://godbolt.org/z/86d5fPhEW #[path = "partword.rs"] mod partword; @@ -20,7 +20,10 @@ use core::{ sync::atomic::Ordering, }; -use crate::raw::{AtomicCompareExchange, AtomicLoad, AtomicStore, AtomicSwap}; +use crate::{ + raw::{AtomicCompareExchange, AtomicLoad, AtomicStore, AtomicSwap}, + utils::{MaybeUninit128, Pair}, +}; type XSize = u64; @@ -43,11 +46,10 @@ macro_rules! atomic_load_store { #[inline] unsafe fn atomic_load( src: *const MaybeUninit, - out: *mut MaybeUninit, _order: Ordering, - ) { + ) -> MaybeUninit { debug_assert!(src as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); + let out: MaybeUninit; // SAFETY: the caller must uphold the safety contract. unsafe { @@ -55,39 +57,33 @@ macro_rules! atomic_load_store { asm!( // (atomic) load from src to r0 concat!("l", $l_suffix, " %r0, 0({src})"), - // store r0 to out - concat!("st", $asm_suffix, " %r0, 0({out})"), src = in(reg) ptr_reg!(src), - out = in(reg) ptr_reg!(out), - out("r0") _, + out("r0") out, options(nostack, preserves_flags), ); } + out } } impl AtomicStore for $int_type { #[inline] unsafe fn atomic_store( dst: *mut MaybeUninit, - val: *const MaybeUninit, + val: MaybeUninit, order: Ordering, ) { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(val as usize % mem::align_of::<$int_type>() == 0); // SAFETY: the caller must uphold the safety contract. unsafe { macro_rules! atomic_store { ($fence:tt) => { asm!( - // load from val to r0 - concat!("l", $l_suffix, " %r0, 0({val})"), // (atomic) store r0 to dst concat!("st", $asm_suffix, " %r0, 0({dst})"), $fence, dst = in(reg) ptr_reg!(dst), - val = in(reg) ptr_reg!(val), - out("r0") _, + in("r0") val, options(nostack, preserves_flags), ) }; @@ -121,77 +117,60 @@ macro_rules! atomic { #[inline] unsafe fn atomic_swap( dst: *mut MaybeUninit, - val: *const MaybeUninit, - out: *mut MaybeUninit, + val: MaybeUninit, _order: Ordering, - ) { + ) -> MaybeUninit { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(val as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); + let mut out: MaybeUninit; // SAFETY: the caller must uphold the safety contract. unsafe { // atomic swap is always SeqCst. asm!( - // load from val to val_tmp - concat!("l", $asm_suffix, " {val_tmp}, 0({val})"), // (atomic) swap (CAS loop) concat!("l", $asm_suffix, " %r0, 0({dst})"), "2:", - concat!("cs", $asm_suffix, " %r0, {val_tmp}, 0({dst})"), + concat!("cs", $asm_suffix, " %r0, {val}, 0({dst})"), "jl 2b", - // store r0 to out - concat!("st", $asm_suffix, " %r0, 0({out})"), dst = in(reg) ptr_reg!(dst), - val = in(reg) ptr_reg!(val), - val_tmp = out(reg) _, - out = in(reg) ptr_reg!(out), - out("r0") _, + val = in(reg) val, + out("r0") out, // Do not use `preserves_flags` because CS modifies the condition code. options(nostack), ); } + out } } impl AtomicCompareExchange for $int_type { #[inline] unsafe fn atomic_compare_exchange( dst: *mut MaybeUninit, - old: *const MaybeUninit, - new: *const MaybeUninit, - out: *mut MaybeUninit, + old: MaybeUninit, + new: MaybeUninit, _success: Ordering, _failure: Ordering, - ) -> bool { + ) -> (MaybeUninit, bool) { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(old as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(new as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); + let out: MaybeUninit; // SAFETY: the caller must uphold the safety contract. unsafe { let mut r: i64; // compare_exchange is always SeqCst. asm!( - // load from old/new to r0/tmp - concat!("l", $asm_suffix, " %r0, 0({old})"), - concat!("l", $asm_suffix, " {tmp}, 0({new})"), // (atomic) CAS - concat!("cs", $asm_suffix, " %r0, {tmp}, 0({dst})"), + concat!("cs", $asm_suffix, " %r0, {new}, 0({dst})"), // store condition code - "ipm {tmp}", - // store r0 to out - concat!("st", $asm_suffix, " %r0, 0({out})"), + "ipm {r}", dst = in(reg) ptr_reg!(dst), - old = in(reg) ptr_reg!(old), - new = in(reg) ptr_reg!(new), - tmp = out(reg) r, - out = in(reg) ptr_reg!(out), - out("r0") _, + new = in(reg) new, + r = lateout(reg) r, + inout("r0") old => out, // Do not use `preserves_flags` because CS modifies the condition code. options(nostack), ); - extract_cc(r) + (out, extract_cc(r)) } } } @@ -205,14 +184,12 @@ macro_rules! atomic_sub_word { #[inline] unsafe fn atomic_swap( dst: *mut MaybeUninit, - val: *const MaybeUninit, - out: *mut MaybeUninit, + val: MaybeUninit, _order: Ordering, - ) { + ) -> MaybeUninit { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(val as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); let (aligned_ptr, shift, _mask) = partword::create_mask_values(dst); + let mut out: MaybeUninit; // SAFETY: the caller must uphold the safety contract. unsafe { @@ -220,44 +197,40 @@ macro_rules! atomic_sub_word { // Based on assemblies generated by rustc/LLVM. // See also partword.rs. asm!( - concat!("l", $l_suffix, " %r0, 0(%r3)"), - "l %r3, 0({dst})", + "l %r0, 0({dst})", "2:", - "rll %r14, %r3, 0({shift})", - concat!("risbg %r14, %r0, 32, ", $risbg_swap), - "rll %r14, %r14, 0({shift_c})", - "cs %r3, %r14, 0({dst})", + "rll {tmp}, %r0, 0({shift})", + concat!("risbg {tmp}, {val}, 32, ", $risbg_swap), + "rll {tmp}, {tmp}, 0({shift_c})", + "cs %r0, {tmp}, 0({dst})", "jl 2b", - concat!("rll %r0, %r3, ", $bits ,"({shift})"), - concat!("st", $asm_suffix, " %r0, 0({out})"), + concat!("rll {out}, %r0, ", $bits ,"({shift})"), dst = in(reg) ptr_reg!(aligned_ptr), - out = in(reg) ptr_reg!(out), + val = in(reg) val, + out = lateout(reg) out, shift = in(reg) shift as u32, shift_c = in(reg) complement(shift as u32), - out("r0") _, - inout("r3") ptr_reg!(val) => _, - out("r14") _, + tmp = out(reg) _, + out("r0") _, // prev // Do not use `preserves_flags` because CS modifies the condition code. options(nostack), ); } + out } } impl AtomicCompareExchange for $int_type { #[inline] unsafe fn atomic_compare_exchange( dst: *mut MaybeUninit, - old: *const MaybeUninit, - new: *const MaybeUninit, - out: *mut MaybeUninit, + old: MaybeUninit, + new: MaybeUninit, _success: Ordering, _failure: Ordering, - ) -> bool { + ) -> (MaybeUninit, bool) { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(old as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(new as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); let (aligned_ptr, shift, _mask) = partword::create_mask_values(dst); + let mut out: MaybeUninit; // SAFETY: the caller must uphold the safety contract. unsafe { @@ -266,35 +239,32 @@ macro_rules! atomic_sub_word { // Based on assemblies generated by rustc/LLVM. // See also partword.rs. asm!( - concat!("ll", $asm_suffix, " %r0, 0(%r3)"), - concat!("l", $l_suffix, " %r1, 0(%r4)"), - "l %r4, 0({dst})", + "l {prev}, 0({dst})", "2:", - concat!("rll %r13, %r4, ", $bits ,"({shift})"), - concat!("risbg %r1, %r13, 32, ", $risbg_cas, ", 0"), - concat!("ll", $asm_suffix, "r %r13, %r13"), - "cr %r13, %r0", + concat!("rll %r0, {prev}, ", $bits ,"({shift})"), + concat!("risbg {new}, %r0, 32, ", $risbg_cas, ", 0"), + concat!("ll", $asm_suffix, "r %r0, %r0"), + "cr %r0, {old}", "jlh 3f", - concat!("rll %r3, %r1, -", $bits ,"({shift_c})"), - "cs %r4, %r3, 0({dst})", + concat!("rll {tmp}, {new}, -", $bits ,"({shift_c})"), + "cs {prev}, {tmp}, 0({dst})", "jl 2b", "3:", // store condition code - "ipm %r0", - concat!("st", $asm_suffix, " %r13, 0({out})"), + "ipm {r}", dst = in(reg) ptr_reg!(aligned_ptr), - out = in(reg) ptr_reg!(out), + prev = out(reg) _, + old = in(reg) crate::utils::zero_extend(old), + new = inout(reg) new => _, shift = in(reg) shift as u32, shift_c = in(reg) complement(shift as u32), - out("r0") r, - out("r1") _, - inout("r3") ptr_reg!(old) => _, - inout("r4") ptr_reg!(new) => _, - out("r13") _, + tmp = out(reg) _, + r = lateout(reg) r, + out("r0") out, // Do not use `preserves_flags` because CS modifies the condition code. options(nostack), ); - extract_cc(r) + (out, extract_cc(r)) } } } @@ -319,11 +289,10 @@ macro_rules! atomic128 { #[inline] unsafe fn atomic_load( src: *const MaybeUninit, - out: *mut MaybeUninit, _order: Ordering, - ) { + ) -> MaybeUninit { debug_assert!(src as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); + let (prev_hi, prev_lo); // SAFETY: the caller must uphold the safety contract. unsafe { @@ -331,16 +300,13 @@ macro_rules! atomic128 { asm!( // (atomic) load from src to out pair "lpq %r0, 0({src})", - // store out pair to out - "stg %r1, 8({out})", - "stg %r0, 0({out})", src = in(reg) ptr_reg!(src), - out = in(reg) ptr_reg!(out), // Quadword atomic instructions work with even/odd pair of specified register and subsequent register. - out("r0") _, // out (hi) - out("r1") _, // out (lo) + out("r0") prev_hi, + out("r1") prev_lo, options(nostack, preserves_flags), ); + MaybeUninit128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.$int_type } } } @@ -348,28 +314,24 @@ macro_rules! atomic128 { #[inline] unsafe fn atomic_store( dst: *mut MaybeUninit, - val: *const MaybeUninit, + val: MaybeUninit, order: Ordering, ) { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(val as usize % mem::align_of::<$int_type>() == 0); + let val = MaybeUninit128 { $int_type: val }; // SAFETY: the caller must uphold the safety contract. unsafe { macro_rules! atomic_store { ($fence:tt) => { asm!( - // load from val to val pair - "lg %r1, 8({val})", - "lg %r0, 0({val})", // (atomic) store val pair to dst "stpq %r0, 0({dst})", $fence, dst = in(reg) ptr_reg!(dst), - val = in(reg) ptr_reg!(val), // Quadword atomic instructions work with even/odd pair of specified register and subsequent register. - out("r0") _, // val (hi) - out("r1") _, // val (lo) + in("r0") val.pair.hi, + in("r1") val.pair.lo, options(nostack, preserves_flags), ) }; @@ -397,40 +359,32 @@ macro_rules! atomic128 { #[inline] unsafe fn atomic_swap( dst: *mut MaybeUninit, - val: *const MaybeUninit, - out: *mut MaybeUninit, + val: MaybeUninit, _order: Ordering, - ) { + ) -> MaybeUninit { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(val as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); + let val = MaybeUninit128 { $int_type: val }; + let (mut prev_hi, mut prev_lo); // SAFETY: the caller must uphold the safety contract. unsafe { // atomic swap is always SeqCst. asm!( - // load from val to val pair - "lg %r1, 8({val})", - "lg %r0, 0({val})", // (atomic) swap (CAS loop) - "lpq %r2, 0({dst})", + "lpq %r0, 0({dst})", "2:", - "cdsg %r2, %r0, 0({dst})", + "cdsg %r0, %r12, 0({dst})", "jl 2b", - // store out pair to out - "stg %r3, 8({out})", - "stg %r2, 0({out})", dst = inout(reg) ptr_reg!(dst) => _, - val = in(reg) ptr_reg!(val), - out = inout(reg) ptr_reg!(out) => _, // Quadword atomic instructions work with even/odd pair of specified register and subsequent register. - out("r0") _, // val (hi) - out("r1") _, // val (lo) - lateout("r2") _, // out (hi) - lateout("r3") _, // out (lo) + out("r0") prev_hi, + out("r1") prev_lo, + in("r12") val.pair.hi, + in("r13") val.pair.lo, // Do not use `preserves_flags` because CDSG modifies the condition code. options(nostack), ); + MaybeUninit128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.$int_type } } } @@ -438,48 +392,39 @@ macro_rules! atomic128 { #[inline] unsafe fn atomic_compare_exchange( dst: *mut MaybeUninit, - old: *const MaybeUninit, - new: *const MaybeUninit, - out: *mut MaybeUninit, + old: MaybeUninit, + new: MaybeUninit, _success: Ordering, _failure: Ordering, - ) -> bool { + ) -> (MaybeUninit, bool) { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(old as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(new as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); + let old = MaybeUninit128 { $int_type: old }; + let new = MaybeUninit128 { $int_type: new }; + let (prev_hi, prev_lo); // SAFETY: the caller must uphold the safety contract. unsafe { let mut r: i64; // compare_exchange is always SeqCst. asm!( - // load from old/new to old/new pairs - "lg %r1, 8({old})", - "lg %r0, 0({old})", - "lg %r13, 8({new})", - "lg %r12, 0({new})", // (atomic) CAS "cdsg %r0, %r12, 0({dst})", // store condition code "ipm {r}", - // store out pair to out - "stg %r1, 8({out})", - "stg %r0, 0({out})", dst = in(reg) ptr_reg!(dst), - old = in(reg) ptr_reg!(old), - new = in(reg) ptr_reg!(new), - out = inout(reg) ptr_reg!(out) => _, r = lateout(reg) r, // Quadword atomic instructions work with even/odd pair of specified register and subsequent register. - out("r0") _, // old (hi) -> out (hi) - out("r1") _, // old (lo) -> out (lo) - out("r12") _, // new (hi) - out("r13") _, // new (hi) + inout("r0") old.pair.hi => prev_hi, + inout("r1") old.pair.lo => prev_lo, + in("r12") new.pair.hi, + in("r13") new.pair.lo, // Do not use `preserves_flags` because CDSG modifies the condition code. options(nostack), ); - extract_cc(r) + ( + MaybeUninit128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.$int_type, + extract_cc(r) + ) } } } diff --git a/src/arch/x86.rs b/src/arch/x86.rs index 39b1fc2b..ed85134c 100644 --- a/src/arch/x86.rs +++ b/src/arch/x86.rs @@ -7,14 +7,14 @@ // - portable-atomic https://github.com/taiki-e/portable-atomic // // Generated asm: -// - x86_64 https://godbolt.org/z/fvqWGT5E6 -// - x86_64 (+cmpxchg16b) https://godbolt.org/z/fGdj8naT9 -// - x86 (i686) https://godbolt.org/z/9jKcboaoG -// - x86 (i686,-sse2) https://godbolt.org/z/sjYK57r96 -// - x86 (i586) https://godbolt.org/z/5rrzYGxPe -// - x86 (i586,-x87) https://godbolt.org/z/GvcdhqxYo -// - x86 (i486) https://godbolt.org/z/nPaGY4oEM -// - x86 (i386) https://godbolt.org/z/YWEc63Kac +// - x86_64 https://godbolt.org/z/4j1brEoKq +// - x86_64 (+cmpxchg16b) https://godbolt.org/z/6j3oTKMn7 +// - x86 (i686) https://godbolt.org/z/Ehx43oKef +// - x86 (i686,-sse2) https://godbolt.org/z/PzdGrvM98 +// - x86 (i586) https://godbolt.org/z/4cj5qcGo9 +// - x86 (i586,-x87) https://godbolt.org/z/41vGvY3j6 +// - x86 (i486) https://godbolt.org/z/a5YjTPvxd +// - x86 (i386) https://godbolt.org/z/ovoYPW8jc use core::{ arch::asm, @@ -23,6 +23,12 @@ use core::{ }; use crate::raw::{AtomicCompareExchange, AtomicLoad, AtomicStore, AtomicSwap}; +#[cfg(target_arch = "x86_64")] +#[cfg(any(target_feature = "cmpxchg16b", atomic_maybe_uninit_target_feature = "cmpxchg16b"))] +use crate::utils::{MaybeUninit128, Pair}; +#[cfg(target_arch = "x86")] +#[cfg(not(atomic_maybe_uninit_no_cmpxchg8b))] +use crate::utils::{MaybeUninit64, Pair}; #[cfg(target_pointer_width = "32")] macro_rules! ptr_modifier { @@ -37,62 +43,39 @@ macro_rules! ptr_modifier { }; } -#[cfg(target_arch = "x86")] -#[cfg(not(atomic_maybe_uninit_no_cmpxchg8b))] -#[cfg(target_feature = "sse")] -#[cfg(target_feature = "sse2")] -macro_rules! if_sse2 { - ($then:expr, $else:expr) => { - $then - }; -} -#[cfg(target_arch = "x86")] -#[cfg(not(atomic_maybe_uninit_no_cmpxchg8b))] -#[cfg(target_feature = "sse")] -#[cfg(not(target_feature = "sse2"))] -macro_rules! if_sse2 { - ($then:expr, $else:expr) => { - $else - }; -} - macro_rules! atomic { ($int_type:ident, $val_reg:tt, $val_modifier:tt, $ptr_size:tt, $cmpxchg_cmp_reg:tt) => { impl AtomicLoad for $int_type { #[inline] unsafe fn atomic_load( src: *const MaybeUninit, - out: *mut MaybeUninit, _order: Ordering, - ) { + ) -> MaybeUninit { debug_assert!(src as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); + let out: MaybeUninit; // SAFETY: the caller must uphold the safety contract. unsafe { // atomic load is always SeqCst. asm!( - // (atomic) load from src to tmp - concat!("mov {tmp", $val_modifier, "}, ", $ptr_size, " ptr [{src", ptr_modifier!(), "}]"), - // store tmp to out - concat!("mov ", $ptr_size, " ptr [{out", ptr_modifier!(), "}], {tmp", $val_modifier, "}"), + // (atomic) load from src to out + concat!("mov {out", $val_modifier, "}, ", $ptr_size, " ptr [{src", ptr_modifier!(), "}]"), src = in(reg) src, - out = inout(reg) out => _, - tmp = lateout($val_reg) _, + out = lateout($val_reg) out, options(nostack, preserves_flags), ); } + out } } impl AtomicStore for $int_type { #[inline] unsafe fn atomic_store( dst: *mut MaybeUninit, - val: *const MaybeUninit, + val: MaybeUninit, order: Ordering, ) { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(val as usize % mem::align_of::<$int_type>() == 0); // SAFETY: the caller must uphold the safety contract. unsafe { @@ -100,25 +83,19 @@ macro_rules! atomic { // Relaxed and Release stores are equivalent. Ordering::Relaxed | Ordering::Release => { asm!( - // load from val to tmp - concat!("mov {tmp", $val_modifier, "}, ", $ptr_size, " ptr [{val", ptr_modifier!(), "}]"), - // (atomic) store tmp to dst - concat!("mov ", $ptr_size, " ptr [{dst", ptr_modifier!(), "}], {tmp", $val_modifier, "}"), - dst = inout(reg) dst => _, - val = in(reg) val, - tmp = lateout($val_reg) _, + // (atomic) store val to dst + concat!("mov ", $ptr_size, " ptr [{dst", ptr_modifier!(), "}], {val", $val_modifier, "}"), + dst = in(reg) dst, + val = in($val_reg) val, options(nostack, preserves_flags), ); } Ordering::SeqCst => { asm!( - // load from val to tmp - concat!("mov {tmp", $val_modifier, "}, ", $ptr_size, " ptr [{val", ptr_modifier!(), "}]"), - // (atomic) store tmp to dst (SeqCst store is xchg, not mov) - concat!("xchg ", $ptr_size, " ptr [{dst", ptr_modifier!(), "}], {tmp", $val_modifier, "}"), - dst = inout(reg) dst => _, - val = in(reg) val, - tmp = lateout($val_reg) _, + // (atomic) store val to dst (SeqCst store is xchg, not mov) + concat!("xchg ", $ptr_size, " ptr [{dst", ptr_modifier!(), "}], {val", $val_modifier, "}"), + dst = in(reg) dst, + val = inout($val_reg) val => _, options(nostack, preserves_flags), ); } @@ -131,31 +108,24 @@ macro_rules! atomic { #[inline] unsafe fn atomic_swap( dst: *mut MaybeUninit, - val: *const MaybeUninit, - out: *mut MaybeUninit, + val: MaybeUninit, _order: Ordering, - ) { + ) -> MaybeUninit { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(val as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); + let out: MaybeUninit; // SAFETY: the caller must uphold the safety contract. unsafe { // atomic swap is always SeqCst. asm!( - // load from val to tmp - concat!("mov {tmp", $val_modifier, "}, ", $ptr_size, " ptr [{val", ptr_modifier!(), "}]"), - // (atomic) swap tmp and dst - concat!("xchg ", $ptr_size, " ptr [{dst", ptr_modifier!(), "}], {tmp", $val_modifier, "}"), - // store tmp to out - concat!("mov ", $ptr_size, " ptr [{out", ptr_modifier!(), "}], {tmp", $val_modifier, "}"), - dst = inout(reg) dst => _, - val = in(reg) val, - out = inout(reg) out => _, - tmp = lateout($val_reg) _, + // (atomic) swap val and dst + concat!("xchg ", $ptr_size, " ptr [{dst", ptr_modifier!(), "}], {val", $val_modifier, "}"), + dst = in(reg) dst, + val = inout($val_reg) val => out, options(nostack, preserves_flags), ); } + out } } #[cfg(not(all(target_arch = "x86", atomic_maybe_uninit_no_cmpxchg)))] @@ -163,16 +133,13 @@ macro_rules! atomic { #[inline] unsafe fn atomic_compare_exchange( dst: *mut MaybeUninit, - old: *const MaybeUninit, - new: *const MaybeUninit, - out: *mut MaybeUninit, + old: MaybeUninit, + new: MaybeUninit, _success: Ordering, _failure: Ordering, - ) -> bool { + ) -> (MaybeUninit, bool) { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(old as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(new as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); + let out: MaybeUninit; // SAFETY: the caller must uphold the safety contract. // @@ -181,30 +148,22 @@ macro_rules! atomic { let r: u8; // compare_exchange is always SeqCst. asm!( - // load from old/new to $cmpxchg_cmp_reg/tmp_new - concat!("mov ", $cmpxchg_cmp_reg, ", ", $ptr_size, " ptr [{old", ptr_modifier!(), "}]"), - concat!("mov {tmp_new", $val_modifier, "}, ", $ptr_size, " ptr [{new", ptr_modifier!(), "}]"), // (atomic) CAS // - Compare $cmpxchg_cmp_reg with dst. - // - If equal, ZF is set and tmp_new is loaded into dst. + // - If equal, ZF is set and new is loaded into dst. // - Else, clear ZF and load dst into $cmpxchg_cmp_reg. - concat!("lock cmpxchg ", $ptr_size, " ptr [{dst", ptr_modifier!(), "}], {tmp_new", $val_modifier, "}"), + concat!("lock cmpxchg ", $ptr_size, " ptr [{dst", ptr_modifier!(), "}], {new", $val_modifier, "}"), // load ZF to r "sete {r}", - // store $cmpxchg_cmp_reg to out - concat!("mov ", $ptr_size, " ptr [{out", ptr_modifier!(), "}], ", $cmpxchg_cmp_reg, ""), dst = in(reg) dst, - old = in(reg) old, - new = in(reg) new, - out = in(reg) out, - tmp_new = out($val_reg) _, + new = in($val_reg) new, r = out(reg_byte) r, - out($cmpxchg_cmp_reg) _, - // Do not use `preserves_flags` because CMPXCHG modifies the ZF flag. + inout($cmpxchg_cmp_reg) old => out, + // Do not use `preserves_flags` because CMPXCHG modifies the ZF, CF, PF, AF, SF, and OF flags. options(nostack), ); debug_assert!(r == 0 || r == 1, "r={}", r); - r != 0 + (out, r != 0) } } } @@ -240,16 +199,15 @@ macro_rules! atomic64 { #[inline] unsafe fn atomic_load( src: *const MaybeUninit, - out: *mut MaybeUninit, _order: Ordering, - ) { + ) -> MaybeUninit { debug_assert!(src as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); #[cfg(target_feature = "sse")] // SAFETY: the caller must uphold the safety contract. // cfg guarantees that the CPU supports SSE. unsafe { + let mut out: MaybeUninit; #[cfg(target_feature = "sse2")] { // atomic load is always SeqCst. @@ -257,18 +215,10 @@ macro_rules! atomic64 { // Refs: // - https://www.felixcloutier.com/x86/movq (SSE2) // - https://www.felixcloutier.com/x86/movd:movq (SSE2) - // - https://www.felixcloutier.com/x86/pshufd (SSE2) - // (atomic) load from src to tmp0 - "movq {tmp0}, qword ptr [{src}]", - // extract lower 64-bits - "pshufd {tmp1}, {tmp0}, 85", - // store tmp0/tmp1 to out - "movd dword ptr [{out}], {tmp0}", - "movd dword ptr [{out} + 4], {tmp1}", + // (atomic) load from src to out + "movq {out}, qword ptr [{src}]", src = in(reg) src, - out = in(reg) out, - tmp0 = out(xmm_reg) _, - tmp1 = out(xmm_reg) _, + out = out(xmm_reg) out, options(nostack, preserves_flags), ); } @@ -279,47 +229,36 @@ macro_rules! atomic64 { // Refs: // - https://www.felixcloutier.com/x86/xorps (SSE) // - https://www.felixcloutier.com/x86/movlps (SSE) - // - https://www.felixcloutier.com/x86/movss (SSE) - // - https://www.felixcloutier.com/x86/shufps (SSE) - "xorps {tmp}, {tmp}", - // (atomic) load from src to tmp - "movlps {tmp}, qword ptr [{src}]", - // store tmp to out - "movss dword ptr [{out}], {tmp}", - "shufps {tmp}, {tmp}, 85", - "movss dword ptr [{out} + 4], {tmp}", + "xorps {out}, {out}", + // (atomic) load from src to out + "movlps {out}, qword ptr [{src}]", src = in(reg) src, - out = in(reg) out, - tmp = out(xmm_reg) _, + out = out(xmm_reg) out, options(nostack, preserves_flags), ); } + core::mem::transmute::<_, [MaybeUninit; 2]>(out)[0] } #[cfg(not(target_feature = "sse"))] // SAFETY: the caller must uphold the safety contract. // // Refs: https://www.felixcloutier.com/x86/cmpxchg8b:cmpxchg16b unsafe { + let (prev_lo, prev_hi); // atomic load is always SeqCst. asm!( - // esi is reserved by LLVM - "xchg {esi_tmp}, esi", // (atomic) load by cmpxchg(0, 0) "lock cmpxchg8b qword ptr [edi]", - // store current value to out - "mov dword ptr [esi], eax", - "mov dword ptr [esi + 4], edx", - "mov esi, {esi_tmp}", // restore esi - esi_tmp = inout(reg) out => _, // set old/new args of cmpxchg8b to 0 - inout("eax") 0_u32 => _, - inout("edx") 0_u32 => _, in("ebx") 0_u32, in("ecx") 0_u32, + inout("eax") 0_u32 => prev_lo, + inout("edx") 0_u32 => prev_hi, in("edi") src, // Do not use `preserves_flags` because CMPXCHG8B modifies the ZF flag. options(nostack), ); + MaybeUninit64 { pair: Pair { lo: prev_lo, hi: prev_hi } }.$int_type } } } @@ -327,11 +266,10 @@ macro_rules! atomic64 { #[inline] unsafe fn atomic_store( dst: *mut MaybeUninit, - val: *const MaybeUninit, + val: MaybeUninit, order: Ordering, ) { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(val as usize % mem::align_of::<$int_type>() == 0); #[cfg(target_feature = "sse")] // SAFETY: the caller must uphold the safety contract. @@ -339,38 +277,30 @@ macro_rules! atomic64 { // // Refs: // - https://www.felixcloutier.com/x86/movlps (SSE) - // - https://www.felixcloutier.com/x86/xorps (SSE) - // - https://www.felixcloutier.com/x86/movsd (SSE2) // - https://www.felixcloutier.com/x86/lock // - https://www.felixcloutier.com/x86/or unsafe { + let val: MaybeUninit + = core::mem::transmute([val, MaybeUninit::uninit()]); match order { // Relaxed and Release stores are equivalent. Ordering::Relaxed | Ordering::Release => { asm!( - if_sse2!("", "xorps {tmp}, {tmp}"), - // load from val to tmp - if_sse2!("movsd {tmp}, qword ptr [{val}]", "movlps {tmp}, qword ptr [{val}]"), - // (atomic) store tmp to dst - "movlps qword ptr [{dst}], {tmp}", + // (atomic) store val to dst + "movlps qword ptr [{dst}], {val}", dst = in(reg) dst, - val = in(reg) val, - tmp = out(xmm_reg) _, + val = in(xmm_reg) val, options(nostack, preserves_flags), ); } Ordering::SeqCst => { let p = core::cell::UnsafeCell::new(0_u32); asm!( - // load from val to tmp - if_sse2!("", "xorps {tmp}, {tmp}"), - if_sse2!("movsd {tmp}, qword ptr [{val}]", "movlps {tmp}, qword ptr [{val}]"), - // (atomic) store tmp to dst - "movlps qword ptr [{dst}], {tmp}", + // (atomic) store val to dst + "movlps qword ptr [{dst}], {val}", "lock or dword ptr [{p}], 0", // equivalent to mfence, but doesn't require SSE2 dst = in(reg) dst, - val = in(reg) val, - tmp = out(xmm_reg) _, + val = in(xmm_reg) val, p = in(reg) p.get(), // Do not use `preserves_flags` because OR modifies the OF, CF, SF, ZF, and PF flags. options(nostack), @@ -384,11 +314,10 @@ macro_rules! atomic64 { // // Refs: https://www.felixcloutier.com/x86/cmpxchg8b:cmpxchg16b unsafe { + let val = MaybeUninit64 { $int_type: val }; // atomic store is always SeqCst. let _ = order; asm!( - "mov ebx, dword ptr [eax]", - "mov ecx, dword ptr [eax + 4]", // This is based on the code generated for the first load in DW RMWs by LLVM, // but it is interesting that they generate code that does mixed-sized atomic access. // @@ -400,10 +329,10 @@ macro_rules! atomic64 { "2:", "lock cmpxchg8b qword ptr [edi]", "jne 2b", - inout("eax") val => _, + in("ebx") val.pair.lo, + in("ecx") val.pair.hi, + out("eax") _, out("edx") _, - out("ebx") _, - out("ecx") _, in("edi") dst, // Do not use `preserves_flags` because CMPXCHG8B modifies the ZF flag. options(nostack), @@ -415,13 +344,12 @@ macro_rules! atomic64 { #[inline] unsafe fn atomic_swap( dst: *mut MaybeUninit, - val: *const MaybeUninit, - out: *mut MaybeUninit, + val: MaybeUninit, _order: Ordering, - ) { + ) -> MaybeUninit { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(val as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); + let val = MaybeUninit64 { $int_type: val }; + let (mut prev_lo, mut prev_hi); // SAFETY: the caller must uphold the safety contract. // @@ -429,10 +357,6 @@ macro_rules! atomic64 { unsafe { // atomic store is always SeqCst. asm!( - // esi is reserved by LLVM - "xchg {esi_tmp}, esi", - "mov ebx, dword ptr [eax]", - "mov ecx, dword ptr [eax + 4]", // This is based on the code generated for the first load in DW RMWs by LLVM, // but it is interesting that they generate code that does mixed-sized atomic access. // @@ -444,19 +368,15 @@ macro_rules! atomic64 { "2:", "lock cmpxchg8b qword ptr [edi]", "jne 2b", - // store previous value to out - "mov dword ptr [esi], eax", - "mov dword ptr [esi + 4], edx", - "mov esi, {esi_tmp}", // restore esi - esi_tmp = inout(reg) out => _, - inout("eax") val => _, - out("edx") _, - out("ebx") _, - out("ecx") _, + in("ebx") val.pair.lo, + in("ecx") val.pair.hi, + out("eax") prev_lo, + out("edx") prev_hi, in("edi") dst, // Do not use `preserves_flags` because CMPXCHG8B modifies the ZF flag. options(nostack), ); + MaybeUninit64 { pair: Pair { lo: prev_lo, hi: prev_hi } }.$int_type } } } @@ -464,48 +384,39 @@ macro_rules! atomic64 { #[inline] unsafe fn atomic_compare_exchange( dst: *mut MaybeUninit, - old: *const MaybeUninit, - new: *const MaybeUninit, - out: *mut MaybeUninit, + old: MaybeUninit, + new: MaybeUninit, _success: Ordering, _failure: Ordering, - ) -> bool { + ) -> (MaybeUninit, bool) { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(old as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(new as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); + let old = MaybeUninit64 { $int_type: old }; + let new = MaybeUninit64 { $int_type: new }; + let (prev_lo, prev_hi); // SAFETY: the caller must uphold the safety contract. // - // Refs: https://www.felixcloutier.com/x86/cmpxchg + // Refs: https://www.felixcloutier.com/x86/cmpxchg8b:cmpxchg16b unsafe { - let mut r: u32; + let r: u32; // compare_exchange is always SeqCst. asm!( - // esi is reserved by LLVM - "xchg {esi_tmp}, esi", - "mov eax, dword ptr [edx]", - "mov edx, dword ptr [edx + 4]", - "mov ebx, dword ptr [ecx]", - "mov ecx, dword ptr [ecx + 4]", // (atomic) CAS "lock cmpxchg8b qword ptr [edi]", "sete cl", - // store previous value to out - "mov dword ptr [esi], eax", - "mov dword ptr [esi + 4], edx", - "mov esi, {esi_tmp}", // restore esi - esi_tmp = inout(reg) out => _, - out("eax") _, - inout("edx") old => _, - out("ebx") _, - inout("ecx") new => r, + in("ebx") new.pair.lo, + inout("ecx") new.pair.hi => r, + inout("eax") old.pair.lo => prev_lo, + inout("edx") old.pair.hi => prev_hi, in("edi") dst, // Do not use `preserves_flags` because CMPXCHG8B modifies the ZF flag. options(nostack), ); debug_assert!(r as u8 == 0 || r as u8 == 1, "r={}", r as u8); - r as u8 != 0 + ( + MaybeUninit64 { pair: Pair { lo: prev_lo, hi: prev_hi } }.$int_type, + r as u8 != 0 + ) } } } @@ -533,11 +444,10 @@ macro_rules! atomic128 { #[inline] unsafe fn atomic_load( src: *const MaybeUninit, - out: *mut MaybeUninit, _order: Ordering, - ) { + ) -> MaybeUninit { debug_assert!(src as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); + let (prev_lo, prev_hi); // SAFETY: the caller must guarantee that `src` is valid for both writes and // reads, 16-byte aligned, and that there are no concurrent non-atomic operations. @@ -559,20 +469,17 @@ macro_rules! atomic128 { "xor rbx, rbx", // zeroed rbx // (atomic) load by cmpxchg(0, 0) concat!("lock cmpxchg16b xmmword ptr [", $rdi, "]"), - // store current value to out - concat!("mov qword ptr [", $rsi, "], rax"), - concat!("mov qword ptr [", $rsi, " + 8], rdx"), "mov rbx, {rbx_tmp}", // restore rbx // set old/new args of cmpxchg16b to 0 (rbx is zeroed after saved to rbx_tmp, to avoid xchg) rbx_tmp = out(reg) _, in("rcx") 0_u64, - inout("rax") 0_u64 => _, - inout("rdx") 0_u64 => _, + inout("rax") 0_u64 => prev_lo, + inout("rdx") 0_u64 => prev_hi, in($rdi) src, - in($rsi) out, // Do not use `preserves_flags` because CMPXCHG16B modifies the ZF flag. options(nostack), ); + MaybeUninit128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.$int_type } } } @@ -580,11 +487,11 @@ macro_rules! atomic128 { #[inline] unsafe fn atomic_store( dst: *mut MaybeUninit, - val: *const MaybeUninit, + val: MaybeUninit, _order: Ordering, ) { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(val as usize % mem::align_of::<$int_type>() == 0); + let val = MaybeUninit128 { $int_type: val }; // SAFETY: the caller must guarantee that `dst` is valid for both writes and // reads, 16-byte aligned, and that there are no concurrent non-atomic operations. @@ -602,9 +509,7 @@ macro_rules! atomic128 { // atomic store is always SeqCst. asm!( // rbx is reserved by LLVM - "mov {rbx_tmp}, rbx", - concat!("mov rbx, qword ptr [", $rsi, "]"), - concat!("mov rcx, qword ptr [", $rsi, " + 8]"), + "xchg {rbx_tmp}, rbx", // This is based on the code generated for the first load in DW RMWs by LLVM, // but it is interesting that they generate code that does mixed-sized atomic access. // @@ -617,12 +522,11 @@ macro_rules! atomic128 { concat!("lock cmpxchg16b xmmword ptr [", $rdi, "]"), "jne 2b", "mov rbx, {rbx_tmp}", // restore rbx - rbx_tmp = out(reg) _, + rbx_tmp = inout(reg) val.pair.lo => _, + in("rcx") val.pair.hi, out("rax") _, - out("rcx") _, out("rdx") _, in($rdi) dst, - in($rsi) val, // Do not use `preserves_flags` because CMPXCHG16B modifies the ZF flag. options(nostack), ); @@ -633,13 +537,12 @@ macro_rules! atomic128 { #[inline] unsafe fn atomic_swap( dst: *mut MaybeUninit, - val: *const MaybeUninit, - out: *mut MaybeUninit, + val: MaybeUninit, _order: Ordering, - ) { + ) -> MaybeUninit { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(val as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); + let val = MaybeUninit128 { $int_type: val }; + let (mut prev_lo, mut prev_hi); // SAFETY: the caller must guarantee that `dst` is valid for both writes and // reads, 16-byte aligned, and that there are no concurrent non-atomic operations. @@ -657,9 +560,7 @@ macro_rules! atomic128 { // atomic swap is always SeqCst. asm!( // rbx is reserved by LLVM - "mov {rbx_tmp}, rbx", - concat!("mov rbx, qword ptr [", $rsi, "]"), - concat!("mov rcx, qword ptr [", $rsi, " + 8]"), + "xchg {rbx_tmp}, rbx", // This is based on the code generated for the first load in DW RMWs by LLVM, // but it is interesting that they generate code that does mixed-sized atomic access. // @@ -671,20 +572,16 @@ macro_rules! atomic128 { "2:", concat!("lock cmpxchg16b xmmword ptr [", $rdi, "]"), "jne 2b", - // store previous value to out - concat!("mov qword ptr [", $r8, "], rax"), - concat!("mov qword ptr [", $r8, " + 8], rdx"), "mov rbx, {rbx_tmp}", // restore rbx - rbx_tmp = out(reg) _, - out("rax") _, - out("rcx") _, - out("rdx") _, + rbx_tmp = inout(reg) val.pair.lo => _, + in("rcx") val.pair.hi, + out("rax") prev_lo, + out("rdx") prev_hi, in($rdi) dst, - in($rsi) val, - in($r8) out, // Do not use `preserves_flags` because CMPXCHG16B modifies the ZF flag. options(nostack), ); + MaybeUninit128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.$int_type } } } @@ -692,16 +589,15 @@ macro_rules! atomic128 { #[inline] unsafe fn atomic_compare_exchange( dst: *mut MaybeUninit, - old: *const MaybeUninit, - new: *const MaybeUninit, - out: *mut MaybeUninit, + old: MaybeUninit, + new: MaybeUninit, _success: Ordering, _failure: Ordering, - ) -> bool { + ) -> (MaybeUninit, bool) { debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - debug_assert!(old as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(new as usize % mem::align_of::<$int_type>() == 0); - debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); + let old = MaybeUninit128 { $int_type: old }; + let new = MaybeUninit128 { $int_type: new }; + let (prev_lo, prev_hi); // SAFETY: the caller must guarantee that `dst` is valid for both writes and // reads, 16-byte aligned, and that there are no concurrent non-atomic operations. @@ -720,33 +616,24 @@ macro_rules! atomic128 { // compare_exchange is always SeqCst. asm!( // rbx is reserved by LLVM - "mov {rbx_tmp}, rbx", - concat!("mov rax, qword ptr [", $rsi, "]"), - concat!("mov rsi, qword ptr [", $rsi, " + 8]"), - concat!("mov rbx, qword ptr [", $rdx, "]"), - concat!("mov rcx, qword ptr [", $rdx, " + 8]"), - "mov rdx, rsi", + "xchg {rbx_tmp}, rbx", // (atomic) CAS concat!("lock cmpxchg16b xmmword ptr [", $rdi, "]"), "sete cl", - // store previous value to out - concat!("mov qword ptr [", $r8, "], rax"), - concat!("mov qword ptr [", $r8, " + 8], rdx"), "mov rbx, {rbx_tmp}", // restore rbx - rbx_tmp = out(reg) _, - out("rax") _, - out("rcx") r, - lateout("rdx") _, - lateout("rsi") _, + rbx_tmp = inout(reg) new.pair.lo => _, + inout("rcx") new.pair.hi => r, + inout("rax") old.pair.lo => prev_lo, + inout("rdx") old.pair.hi => prev_hi, in($rdi) dst, - in($rsi) old, - in($rdx) new, - in($r8) out, // Do not use `preserves_flags` because CMPXCHG16B modifies the ZF flag. options(nostack), ); debug_assert!(r as u8 == 0 || r as u8 == 1, "r={}", r as u8); - r as u8 != 0 + ( + MaybeUninit128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.$int_type, + r as u8 != 0 + ) } } } @@ -784,14 +671,26 @@ macro_rules! cfg_has_atomic_32 { macro_rules! cfg_no_atomic_32 { ($($tt:tt)*) => {}; } +#[cfg(not(all(target_arch = "x86", atomic_maybe_uninit_no_cmpxchg8b)))] #[macro_export] macro_rules! cfg_has_atomic_64 { ($($tt:tt)*) => { $($tt)* }; } +#[cfg(not(all(target_arch = "x86", atomic_maybe_uninit_no_cmpxchg8b)))] #[macro_export] macro_rules! cfg_no_atomic_64 { ($($tt:tt)*) => {}; } +#[cfg(all(target_arch = "x86", atomic_maybe_uninit_no_cmpxchg8b))] +#[macro_export] +macro_rules! cfg_has_atomic_64 { + ($($tt:tt)*) => {}; +} +#[cfg(all(target_arch = "x86", atomic_maybe_uninit_no_cmpxchg8b))] +#[macro_export] +macro_rules! cfg_no_atomic_64 { + ($($tt:tt)*) => { $($tt)* }; +} #[cfg(not(all( target_arch = "x86_64", any(target_feature = "cmpxchg16b", atomic_maybe_uninit_target_feature = "cmpxchg16b"), diff --git a/src/arch_legacy/aarch64.rs b/src/arch_legacy/aarch64.rs new file mode 100644 index 00000000..502ab1a0 --- /dev/null +++ b/src/arch_legacy/aarch64.rs @@ -0,0 +1,894 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// AArch64 +// +// Refs: +// - ARM Compiler armasm User Guide +// https://developer.arm.com/documentation/dui0801/latest +// - Arm A-profile A64 Instruction Set Architecture +// https://developer.arm.com/documentation/ddi0602/latest +// - Arm Architecture Reference Manual for A-profile architecture +// https://developer.arm.com/documentation/ddi0487/latest +// - Arm Architecture Reference Manual Supplement - Armv8, for Armv8-R AArch64 architecture profile +// https://developer.arm.com/documentation/ddi0600/latest +// - portable-atomic https://github.com/taiki-e/portable-atomic +// +// Generated asm: +// - aarch64 https://godbolt.org/z/6TKofhrbb +// - aarch64 msvc https://godbolt.org/z/5GzETjcE7 +// - aarch64 (+lse) https://godbolt.org/z/7jK5vej7b +// - aarch64 msvc (+lse) https://godbolt.org/z/896zWazdW +// - aarch64 (+lse,+lse2) https://godbolt.org/z/66cMd4Ys6 +// - aarch64 (+lse,+lse2,+rcpc3) https://godbolt.org/z/ojbaYn9Kf +// - aarch64 (+rcpc) https://godbolt.org/z/4ahePW8TK +// - aarch64 (+lse2,+lse128) https://godbolt.org/z/joMq5vv1h +// - aarch64 (+lse2,+lse128,+rcpc3) https://godbolt.org/z/WdbsccKcz + +use core::{ + arch::asm, + mem::{self, MaybeUninit}, + sync::atomic::Ordering, +}; + +use crate::raw::{AtomicCompareExchange, AtomicLoad, AtomicStore, AtomicSwap}; + +macro_rules! atomic_rmw { + ($op:ident, $order:ident) => { + atomic_rmw!($op, $order, write = $order) + }; + ($op:ident, $order:ident, write = $write:ident) => { + match $order { + Ordering::Relaxed => $op!("", "", ""), + Ordering::Acquire => $op!("a", "", ""), + Ordering::Release => $op!("", "l", ""), + Ordering::AcqRel => $op!("a", "l", ""), + // In MSVC environments, SeqCst stores/writes needs fences after writes. + // https://reviews.llvm.org/D141748 + #[cfg(target_env = "msvc")] + Ordering::SeqCst if $write == Ordering::SeqCst => $op!("a", "l", "dmb ish"), + // AcqRel and SeqCst RMWs are equivalent in non-MSVC environments. + Ordering::SeqCst => $op!("a", "l", ""), + _ => unreachable!("{:?}", $order), + } + }; +} + +macro_rules! atomic { + ($int_type:ident, $asm_suffix:tt, $val_modifier:tt) => { + impl AtomicLoad for $int_type { + #[inline] + unsafe fn atomic_load( + src: *const MaybeUninit, + order: Ordering, + ) -> MaybeUninit { + debug_assert!(src as usize % mem::size_of::<$int_type>() == 0); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + macro_rules! atomic_load { + ($acquire:tt) => { + asm!( + // (atomic) load from src to tmp + concat!("ld", $acquire, "r", $asm_suffix, " {tmp", $val_modifier, "}, [{src}]"), + // store tmp to out + concat!("str", $asm_suffix, " {tmp", $val_modifier, "}, [{out}]"), + src = in(reg) ptr_reg!(src), + out = inout(reg) ptr_reg!(out_ptr) => _, + tmp = lateout(reg) _, + options(nostack, preserves_flags), + ) + }; + } + match order { + Ordering::Relaxed => atomic_load!(""), + #[cfg(any(target_feature = "rcpc", atomic_maybe_uninit_target_feature = "rcpc"))] + Ordering::Acquire => { + // SAFETY: cfg guarantee that the CPU supports FEAT_LRCPC. + asm!( + // (atomic) load from src to tmp + concat!("ldapr", $asm_suffix, " {tmp", $val_modifier, "}, [{src}]"), + // store tmp to out + concat!("str", $asm_suffix, " {tmp", $val_modifier, "}, [{out}]"), + src = in(reg) ptr_reg!(src), + out = inout(reg) ptr_reg!(out_ptr) => _, + tmp = lateout(reg) _, + options(nostack, preserves_flags), + ); + } + #[cfg(not(any(target_feature = "rcpc", atomic_maybe_uninit_target_feature = "rcpc")))] + Ordering::Acquire => atomic_load!("a"), + Ordering::SeqCst => atomic_load!("a"), + _ => unreachable!("{:?}", order), + } + } + out + } + } + impl AtomicStore for $int_type { + #[inline] + unsafe fn atomic_store( + dst: *mut MaybeUninit, + val: MaybeUninit, + order: Ordering, + ) { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + let val = val.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + macro_rules! atomic_store { + ($release:tt, $fence:tt) => { + asm!( + // load from val to tmp + concat!("ldr", $asm_suffix, " {tmp", $val_modifier, "}, [{val}]"), + // (atomic) store tmp to dst + concat!("st", $release, "r", $asm_suffix, " {tmp", $val_modifier, "}, [{dst}]"), + $fence, + dst = inout(reg) ptr_reg!(dst) => _, + val = in(reg) ptr_reg!(val), + tmp = lateout(reg) _, + options(nostack, preserves_flags), + ) + }; + } + match order { + Ordering::Relaxed => atomic_store!("", ""), + Ordering::Release => atomic_store!("l", ""), + // AcqRel and SeqCst RMWs are equivalent in non-MSVC environments. + #[cfg(not(target_env = "msvc"))] + Ordering::SeqCst => atomic_store!("l", ""), + // In MSVC environments, SeqCst stores/writes needs fences after writes. + // https://reviews.llvm.org/D141748 + #[cfg(target_env = "msvc")] + Ordering::SeqCst => atomic_store!("l", "dmb ish"), + _ => unreachable!("{:?}", order), + } + } + } + } + impl AtomicSwap for $int_type { + #[inline] + unsafe fn atomic_swap( + dst: *mut MaybeUninit, + val: MaybeUninit, + order: Ordering, + ) -> MaybeUninit { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + let val = val.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + #[cfg(any(target_feature = "lse", atomic_maybe_uninit_target_feature = "lse"))] + macro_rules! swap { + ($acquire:tt, $release:tt, $fence:tt) => { + asm!( + // load from val to tmp + concat!("ldr", $asm_suffix, " {tmp", $val_modifier, "}, [{val}]"), + // (atomic) swap + // Refs: https://developer.arm.com/documentation/dui0801/g/A64-Data-Transfer-Instructions/SWPA--SWPAL--SWP--SWPL--SWPAL--SWP--SWPL + concat!("swp", $acquire, $release, $asm_suffix, " {tmp", $val_modifier, "}, {tmp", $val_modifier, "}, [{dst}]"), + $fence, + // store tmp to out + concat!("str", $asm_suffix, " {tmp", $val_modifier, "}, [{out}]"), + dst = inout(reg) ptr_reg!(dst) => _, + val = in(reg) ptr_reg!(val), + out = inout(reg) ptr_reg!(out_ptr) => _, + tmp = lateout(reg) _, + options(nostack, preserves_flags), + ) + }; + } + #[cfg(not(any(target_feature = "lse", atomic_maybe_uninit_target_feature = "lse")))] + macro_rules! swap { + ($acquire:tt, $release:tt, $fence:tt) => { + asm!( + // load from val to val_tmp + concat!("ldr", $asm_suffix, " {val_tmp", $val_modifier, "}, [{val}]"), + // (atomic) swap (LL/SC loop) + "2:", + // load from dst to out_tmp + concat!("ld", $acquire, "xr", $asm_suffix, " {out_tmp", $val_modifier, "}, [{dst}]"), + // try to store val to dst + concat!("st", $release, "xr", $asm_suffix, " {r:w}, {val_tmp", $val_modifier, "}, [{dst}]"), + // 0 if the store was successful, 1 if no store was performed + "cbnz {r:w}, 2b", + $fence, + // store out_tmp to out + concat!("str", $asm_suffix, " {out_tmp", $val_modifier, "}, [{out}]"), + dst = inout(reg) ptr_reg!(dst) => _, + val = in(reg) ptr_reg!(val), + val_tmp = out(reg) _, + out = inout(reg) ptr_reg!(out_ptr) => _, + out_tmp = out(reg) _, + r = lateout(reg) _, + options(nostack, preserves_flags), + ) + }; + } + atomic_rmw!(swap, order); + } + out + } + } + impl AtomicCompareExchange for $int_type { + #[inline] + unsafe fn atomic_compare_exchange( + dst: *mut MaybeUninit, + old: MaybeUninit, + new: MaybeUninit, + success: Ordering, + failure: Ordering, + ) -> (MaybeUninit, bool) { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + let order = crate::utils::upgrade_success_ordering(success, failure); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + let old = old.as_ptr(); + let new = new.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + let mut r: i32; + #[cfg(any(target_feature = "lse", atomic_maybe_uninit_target_feature = "lse"))] + macro_rules! cmpxchg { + ($acquire:tt, $release:tt, $fence:tt) => {{ + asm!( + // load from old/new to old_tmp/new_tmp + concat!("ldr", $asm_suffix, " {old_tmp", $val_modifier, "}, [{old}]"), + concat!("ldr", $asm_suffix, " {new_tmp", $val_modifier, "}, [{new}]"), + // cas writes the current value to the first register, + // so copy the `old`'s value for later comparison. + concat!("mov {out_tmp", $val_modifier, "}, {old_tmp", $val_modifier, "}"), + // (atomic) CAS + // Refs: https://developer.arm.com/documentation/dui0801/g/A64-Data-Transfer-Instructions/CASA--CASAL--CAS--CASL--CASAL--CAS--CASL + concat!("cas", $acquire, $release, $asm_suffix, " {out_tmp", $val_modifier, "}, {new_tmp", $val_modifier, "}, [{dst}]"), + $fence, + concat!("cmp {out_tmp", $val_modifier, "}, {old_tmp", $val_modifier, "}"), + // store out_tmp to out + concat!("str", $asm_suffix, " {out_tmp", $val_modifier, "}, [{out}]"), + "cset {r:w}, eq", + dst = inout(reg) ptr_reg!(dst) => _, + old = in(reg) ptr_reg!(old), + old_tmp = out(reg) _, + new = in(reg) ptr_reg!(new), + new_tmp = out(reg) _, + out = inout(reg) ptr_reg!(out_ptr) => _, + out_tmp = out(reg) _, + r = lateout(reg) r, + // Do not use `preserves_flags` because CMP modifies the condition flags. + options(nostack), + ); + debug_assert!(r == 0 || r == 1, "r={}", r); + (out, r != 0) + }}; + } + #[cfg(not(any(target_feature = "lse", atomic_maybe_uninit_target_feature = "lse")))] + macro_rules! cmpxchg { + ($acquire:tt, $release:tt, $fence:tt) => {{ + asm!( + // load from old/new to old_tmp/new_tmp + concat!("ldr", $asm_suffix, " {new_tmp", $val_modifier, "}, [{new}]"), + concat!("ldr", $asm_suffix, " {old_tmp", $val_modifier, "}, [{old}]"), + // (atomic) CAS (LL/SC loop) + "2:", + concat!("ld", $acquire, "xr", $asm_suffix, " {out_tmp", $val_modifier, "}, [{dst}]"), + concat!("cmp {out_tmp", $val_modifier, "}, {old_tmp", $val_modifier, "}"), + "b.ne 3f", // jump if compare failed + concat!("st", $release, "xr", $asm_suffix, " {r:w}, {new_tmp", $val_modifier, "}, [{dst}]"), + // 0 if the store was successful, 1 if no store was performed + "cbnz {r:w}, 2b", // continue loop if store failed + $fence, + "b 4f", + "3:", + "mov {r:w}, #1", // mark as failed + "clrex", + "4:", + // store out_tmp to out + concat!("str", $asm_suffix, " {out_tmp", $val_modifier, "}, [{out}]"), + dst = inout(reg) ptr_reg!(dst) => _, + old = in(reg) ptr_reg!(old), + old_tmp = out(reg) _, + new = in(reg) ptr_reg!(new), + new_tmp = out(reg) _, + out = inout(reg) ptr_reg!(out_ptr) => _, + out_tmp = out(reg) _, + r = lateout(reg) r, + // Do not use `preserves_flags` because CMP modifies the condition flags. + options(nostack), + ); + debug_assert!(r == 0 || r == 1, "r={}", r); + // 0 if the store was successful, 1 if no store was performed + (out, r == 0) + }}; + } + atomic_rmw!(cmpxchg, order, write = success) + } + } + #[cfg(not(any(target_feature = "lse", atomic_maybe_uninit_target_feature = "lse")))] + #[inline] + unsafe fn atomic_compare_exchange_weak( + dst: *mut MaybeUninit, + old: MaybeUninit, + new: MaybeUninit, + success: Ordering, + failure: Ordering, + ) -> (MaybeUninit, bool) { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + let order = crate::utils::upgrade_success_ordering(success, failure); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + let old = old.as_ptr(); + let new = new.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + let r: i32; + macro_rules! cmpxchg_weak { + ($acquire:tt, $release:tt, $fence:tt) => { + asm!( + // load from old/new to old_tmp/new_tmp + concat!("ldr", $asm_suffix, " {new_tmp", $val_modifier, "}, [{new}]"), + concat!("ldr", $asm_suffix, " {old_tmp", $val_modifier, "}, [{old}]"), + // (atomic) CAS + concat!("ld", $acquire, "xr", $asm_suffix, " {out_tmp", $val_modifier, "}, [{dst}]"), + concat!("cmp {out_tmp", $val_modifier, "}, {old_tmp", $val_modifier, "}"), + "b.ne 3f", + concat!("st", $release, "xr", $asm_suffix, " {r:w}, {new_tmp", $val_modifier, "}, [{dst}]"), + // TODO: only emit when the above sc succeed + // // 0 if the store was successful, 1 if no store was performed + // "cbnz {r:w}, 4f", + $fence, + "b 4f", + "3:", + "mov {r:w}, #1", + "clrex", + "4:", + // store out_tmp to out + concat!("str", $asm_suffix, " {out_tmp", $val_modifier, "}, [{out}]"), + dst = inout(reg) ptr_reg!(dst) => _, + old = in(reg) ptr_reg!(old), + old_tmp = out(reg) _, + new = in(reg) ptr_reg!(new), + new_tmp = out(reg) _, + out = inout(reg) ptr_reg!(out_ptr) => _, + out_tmp = out(reg) _, + r = lateout(reg) r, + // Do not use `preserves_flags` because CMP modifies the condition flags. + options(nostack), + ) + }; + } + atomic_rmw!(cmpxchg_weak, order, write = success); + debug_assert!(r == 0 || r == 1, "r={}", r); + // 0 if the store was successful, 1 if no store was performed + (out, r == 0) + } + } + } + }; +} + +atomic!(i8, "b", ":w"); +atomic!(u8, "b", ":w"); +atomic!(i16, "h", ":w"); +atomic!(u16, "h", ":w"); +atomic!(i32, "", ":w"); +atomic!(u32, "", ":w"); +atomic!(i64, "", ""); +atomic!(u64, "", ""); +#[cfg(target_pointer_width = "32")] +atomic!(isize, "", ":w"); +#[cfg(target_pointer_width = "32")] +atomic!(usize, "", ":w"); +#[cfg(target_pointer_width = "64")] +atomic!(isize, "", ""); +#[cfg(target_pointer_width = "64")] +atomic!(usize, "", ""); + +// There are a few ways to implement 128-bit atomic operations in AArch64. +// +// - LDXP/STXP loop (DW LL/SC) +// - CASP (DWCAS) added as FEAT_LSE (mandatory from armv8.1-a) +// - LDP/STP (DW load/store) if FEAT_LSE2 (optional from armv8.2-a, mandatory from armv8.4-a) is available +// - LDIAPP/STILP (DW acquire-load/release-store) added as FEAT_LRCPC3 (optional from armv8.9-a/armv9.4-a) (if FEAT_LSE2 is also available) +// - LDCLRP/LDSETP/SWPP (DW RMW) added as FEAT_LSE128 (optional from armv9.4-a) +// +// If FEAT_LSE is available at compile-time, we use CASP for load/CAS. Otherwise, use LDXP/STXP loop. +// If FEAT_LSE2 is available at compile-time, we use LDP/STP for load/store. +// If FEAT_LSE128 is available at compile-time, we use SWPP for swap/{release,seqcst}-store. +// If FEAT_LSE2 and FEAT_LRCPC3 are available at compile-time, we use LDIAPP/STILP for acquire-load/release-store. +// +// Note: FEAT_LSE2 doesn't imply FEAT_LSE. FEAT_LSE128 implies FEAT_LSE but not FEAT_LSE2. +// +// Refs: +// - LDP: https://developer.arm.com/documentation/dui0801/g/A64-Data-Transfer-Instructions/LDP +// - LDXP: https://developer.arm.com/documentation/dui0801/g/A64-Data-Transfer-Instructions/LDXP +// - LDAXP: https://developer.arm.com/documentation/dui0801/g/A64-Data-Transfer-Instructions/LDAXP +// - STP: https://developer.arm.com/documentation/dui0801/g/A64-Data-Transfer-Instructions/STP +// - STXP: https://developer.arm.com/documentation/dui0801/g/A64-Data-Transfer-Instructions/STXP +// - STLXP: https://developer.arm.com/documentation/dui0801/g/A64-Data-Transfer-Instructions/STLXP +// +// Note: Load-Exclusive pair (by itself) does not guarantee atomicity; to complete an atomic +// operation (even load/store), a corresponding Store-Exclusive pair must succeed. +// See Arm Architecture Reference Manual for A-profile architecture +// Section B2.2.1 "Requirements for single-copy atomicity", and +// Section B2.9 "Synchronization and semaphores" for more. +macro_rules! atomic128 { + ($int_type:ident) => { + impl AtomicLoad for $int_type { + #[inline] + unsafe fn atomic_load( + src: *const MaybeUninit, + order: Ordering, + ) -> MaybeUninit { + debug_assert!(src as usize % mem::size_of::<$int_type>() == 0); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + + #[cfg(any(target_feature = "lse2", atomic_maybe_uninit_target_feature = "lse2"))] + // SAFETY: the caller must guarantee that `dst` is valid for reads, + // 16-byte aligned, that there are no concurrent non-atomic operations. + // the above cfg guarantee that the CPU supports FEAT_LSE2. + unsafe { + macro_rules! atomic_load_relaxed { + ($acquire:tt) => { + asm!( + // (atomic) load from src to tmp pair + "ldp {tmp_lo}, {tmp_hi}, [{src}]", + $acquire, + // store tmp pair to out + "stp {tmp_lo}, {tmp_hi}, [{out}]", + src = in(reg) ptr_reg!(src), + out = in(reg) ptr_reg!(out_ptr), + tmp_hi = out(reg) _, + tmp_lo = out(reg) _, + options(nostack, preserves_flags), + ) + }; + } + match order { + Ordering::Relaxed => atomic_load_relaxed!(""), + #[cfg(any(target_feature = "rcpc3", atomic_maybe_uninit_target_feature = "rcpc3"))] + Ordering::Acquire => { + // SAFETY: cfg guarantee that the CPU supports FEAT_LRCPC3. + // Refs: https://developer.arm.com/documentation/ddi0602/2023-03/Base-Instructions/LDIAPP--Load-Acquire-RCpc-ordered-Pair-of-registers- + asm!( + // (atomic) load from src to tmp pair + "ldiapp {tmp_lo}, {tmp_hi}, [{src}]", + // store tmp pair to out + "stp {tmp_lo}, {tmp_hi}, [{out}]", + src = in(reg) ptr_reg!(src), + out = in(reg) ptr_reg!(out_ptr), + tmp_hi = out(reg) _, + tmp_lo = out(reg) _, + options(nostack, preserves_flags), + ); + } + #[cfg(not(any(target_feature = "rcpc3", atomic_maybe_uninit_target_feature = "rcpc3")))] + Ordering::Acquire => atomic_load_relaxed!("dmb ishld"), + Ordering::SeqCst => { + asm!( + // ldar (or dmb ishld) is required to prevent reordering with preceding stlxp. + // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108891 + "ldar {tmp}, [{src}]", + // (atomic) load from src to tmp pair + "ldp {tmp_lo}, {tmp_hi}, [{src}]", + "dmb ishld", + // store tmp pair to out + "stp {tmp_lo}, {tmp_hi}, [{out}]", + src = in(reg) ptr_reg!(src), + out = in(reg) ptr_reg!(out_ptr), + tmp_hi = out(reg) _, + tmp_lo = out(reg) _, + tmp = out(reg) _, + options(nostack, preserves_flags), + ); + }, + _ => unreachable!("{:?}", order), + } + } + #[cfg(not(any(target_feature = "lse2", atomic_maybe_uninit_target_feature = "lse2")))] + // SAFETY: the caller must uphold the safety contract. + unsafe { + #[cfg(any(target_feature = "lse", atomic_maybe_uninit_target_feature = "lse"))] + macro_rules! atomic_load { + ($acquire:tt, $release:tt) => { + asm!( + // (atomic) load (CAS) + // Refs: + // - https://developer.arm.com/documentation/dui0801/g/A64-Data-Transfer-Instructions/CASPA--CASPAL--CASP--CASPL--CASPAL--CASP--CASPL + // - https://github.com/taiki-e/portable-atomic/pull/20 + concat!("casp", $acquire, $release, " x2, x3, x2, x3, [{src}]"), + // store out pair to out + "stp x2, x3, [{out}]", + src = in(reg) ptr_reg!(src), + out = in(reg) ptr_reg!(out_ptr), + // must be allocated to even/odd register pair + inout("x2") 0_u64 => _, // out_lo + inout("x3") 0_u64 => _, // out_lo + options(nostack, preserves_flags), + ) + }; + } + #[cfg(not(any(target_feature = "lse", atomic_maybe_uninit_target_feature = "lse")))] + macro_rules! atomic_load { + ($acquire:tt, $release:tt) => { + asm!( + // (atomic) load from src to tmp pair + "2:", + // load from src to tmp pair + concat!("ld", $acquire, "xp {tmp_lo}, {tmp_hi}, [{src}]"), + // store tmp pair to src + concat!("st", $release, "xp {r:w}, {tmp_lo}, {tmp_hi}, [{src}]"), + // 0 if the store was successful, 1 if no store was performed + "cbnz {r:w}, 2b", + // store tmp pair to out + "stp {tmp_lo}, {tmp_hi}, [{out}]", + src = in(reg) ptr_reg!(src), + out = in(reg) ptr_reg!(out_ptr), + tmp_hi = out(reg) _, + tmp_lo = out(reg) _, + r = out(reg) _, + options(nostack, preserves_flags), + ) + }; + } + match order { + Ordering::Relaxed => atomic_load!("", ""), + Ordering::Acquire => atomic_load!("a", ""), + Ordering::SeqCst => atomic_load!("a", "l"), + _ => unreachable!("{:?}", order), + } + } + out + } + } + impl AtomicStore for $int_type { + #[inline] + unsafe fn atomic_store( + dst: *mut MaybeUninit, + val: MaybeUninit, + order: Ordering, + ) { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + let val = val.as_ptr(); + + #[cfg(any(target_feature = "lse2", atomic_maybe_uninit_target_feature = "lse2"))] + // SAFETY: the caller must guarantee that `dst` is valid for writes, + // 16-byte aligned, that there are no concurrent non-atomic operations. + // the above cfg guarantee that the CPU supports FEAT_LSE2. + unsafe { + macro_rules! atomic_store { + ($acquire:tt, $release:tt) => { + asm!( + // load from val to val pair + "ldp {val_lo}, {val_hi}, [{val}]", + // (atomic) store val pair to dst + $release, + "stp {val_lo}, {val_hi}, [{dst}]", + $acquire, + dst = in(reg) ptr_reg!(dst), + val = in(reg) ptr_reg!(val), + val_hi = out(reg) _, + val_lo = out(reg) _, + options(nostack, preserves_flags), + ) + }; + } + // Use swpp if stp requires fences. + // https://reviews.llvm.org/D143506 + #[cfg(any(target_feature = "lse128", atomic_maybe_uninit_target_feature = "lse128"))] + macro_rules! atomic_store_swpp { + ($acquire:tt, $release:tt, $fence:tt) => { + asm!( + // load from val to val pair + "ldp {val_lo}, {val_hi}, [{val}]", + // (atomic) swap + concat!("swpp", $acquire, $release, " {val_lo}, {val_hi}, [{dst}]"), + $fence, + dst = in(reg) ptr_reg!(dst), + val = in(reg) ptr_reg!(val), + val_hi = out(reg) _, + val_lo = out(reg) _, + options(nostack, preserves_flags), + ) + }; + } + match order { + Ordering::Relaxed => atomic_store!("", ""), + #[cfg(any(target_feature = "rcpc3", atomic_maybe_uninit_target_feature = "rcpc3"))] + Ordering::Release => { + // SAFETY: cfg guarantee that the CPU supports FEAT_LRCPC3. + // Refs: https://developer.arm.com/documentation/ddi0602/2023-03/Base-Instructions/STILP--Store-Release-ordered-Pair-of-registers- + asm!( + // load from val to val pair + "ldp {val_lo}, {val_hi}, [{val}]", + // (atomic) store val pair to dst + "stilp {val_lo}, {val_hi}, [{dst}]", + dst = in(reg) ptr_reg!(dst), + val = in(reg) ptr_reg!(val), + val_hi = out(reg) _, + val_lo = out(reg) _, + options(nostack, preserves_flags), + ); + } + #[cfg(any(target_feature = "lse128", atomic_maybe_uninit_target_feature = "lse128"))] + #[cfg(not(any(target_feature = "rcpc3", atomic_maybe_uninit_target_feature = "rcpc3")))] + Ordering::Release => atomic_rmw!(atomic_store_swpp, order), + #[cfg(not(any(target_feature = "lse128", atomic_maybe_uninit_target_feature = "lse128")))] + #[cfg(not(any(target_feature = "rcpc3", atomic_maybe_uninit_target_feature = "rcpc3")))] + Ordering::Release => atomic_store!("", "dmb ish"), + #[cfg(any(target_feature = "lse128", atomic_maybe_uninit_target_feature = "lse128"))] + Ordering::SeqCst => atomic_rmw!(atomic_store_swpp, order), + #[cfg(not(any(target_feature = "lse128", atomic_maybe_uninit_target_feature = "lse128")))] + Ordering::SeqCst => atomic_store!("dmb ish", "dmb ish"), + _ => unreachable!("{:?}", order), + } + } + #[cfg(not(any(target_feature = "lse2", atomic_maybe_uninit_target_feature = "lse2")))] + // SAFETY: the caller must uphold the safety contract. + unsafe { + macro_rules! store { + ($acquire:tt, $release:tt, $fence:tt) => { + asm!( + // load from val to val pair + "ldp {val_lo}, {val_hi}, [{val}]", + // (atomic) store val pair to dst (LL/SC loop) + "2:", + // load from dst to xzr/tmp pair + concat!("ld", $acquire, "xp xzr, {tmp}, [{dst}]"), + // try to store val pair to dst + concat!("st", $release, "xp {tmp:w}, {val_lo}, {val_hi}, [{dst}]"), + // 0 if the store was successful, 1 if no store was performed + "cbnz {tmp:w}, 2b", + $fence, + dst = inout(reg) ptr_reg!(dst) => _, + val = in(reg) ptr_reg!(val), + val_hi = out(reg) _, + val_lo = out(reg) _, + tmp = lateout(reg) _, + options(nostack, preserves_flags), + ) + }; + } + atomic_rmw!(store, order); + } + } + } + impl AtomicSwap for $int_type { + #[inline] + unsafe fn atomic_swap( + dst: *mut MaybeUninit, + val: MaybeUninit, + order: Ordering, + ) -> MaybeUninit { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + let val = val.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + #[cfg(any(target_feature = "lse128", atomic_maybe_uninit_target_feature = "lse128"))] + macro_rules! swap { + ($acquire:tt, $release:tt, $fence:tt) => { + asm!( + // load from val to val pair + "ldp {val_lo}, {val_hi}, [{val}]", + // (atomic) swap + concat!("swpp", $acquire, $release, " {val_lo}, {val_hi}, [{dst}]"), + $fence, + // store out pair to out + "stp {val_lo}, {val_hi}, [{out}]", + dst = in(reg) ptr_reg!(dst), + val = in(reg) ptr_reg!(val), + out = in(reg) ptr_reg!(out_ptr), + val_hi = out(reg) _, + val_lo = out(reg) _, + options(nostack, preserves_flags), + ) + }; + } + #[cfg(not(any(target_feature = "lse128", atomic_maybe_uninit_target_feature = "lse128")))] + macro_rules! swap { + ($acquire:tt, $release:tt, $fence:tt) => { + asm!( + // load from val to val pair + "ldp {val_lo}, {val_hi}, [{val}]", + // (atomic) swap (LL/SC loop) + "2:", + // load from dst to out pair + concat!("ld", $acquire, "xp {out_lo}, {out_hi}, [{dst}]"), + // try to store val pair to dst + concat!("st", $release, "xp {r:w}, {val_lo}, {val_hi}, [{dst}]"), + // 0 if the store was successful, 1 if no store was performed + "cbnz {r:w}, 2b", + $fence, + // store out pair to out + "stp {out_lo}, {out_hi}, [{out}]", + dst = inout(reg) ptr_reg!(dst) => _, + val = in(reg) ptr_reg!(val), + out = inout(reg) ptr_reg!(out_ptr) => _, + val_hi = out(reg) _, + val_lo = out(reg) _, + out_hi = out(reg) _, + out_lo = out(reg) _, + r = lateout(reg) _, + options(nostack, preserves_flags), + ) + }; + } + atomic_rmw!(swap, order); + } + out + } + } + impl AtomicCompareExchange for $int_type { + #[inline] + unsafe fn atomic_compare_exchange( + dst: *mut MaybeUninit, + old: MaybeUninit, + new: MaybeUninit, + success: Ordering, + failure: Ordering, + ) -> (MaybeUninit, bool) { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + let order = crate::utils::upgrade_success_ordering(success, failure); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + let old = old.as_ptr(); + let new = new.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + let mut r: i32; + #[cfg(any(target_feature = "lse", atomic_maybe_uninit_target_feature = "lse"))] + macro_rules! cmpxchg { + ($acquire:tt, $release:tt, $fence:tt) => {{ + asm!( + // load from old/new to old/new pairs + "ldp {old_lo}, {old_hi}, [{old}]", + "ldp x4, x5, [{new}]", + // casp writes the current value to the first register pair, + // so copy the `old`'s value for later comparison. + "mov x8, {old_lo}", + "mov x9, {old_hi}", + // (atomic) CAS + // Refs: https://developer.arm.com/documentation/dui0801/g/A64-Data-Transfer-Instructions/CASPA--CASPAL--CASP--CASPL--CASPAL--CASP--CASPL + concat!("casp", $acquire, $release, " x8, x9, x4, x5, [{dst}]"), + $fence, + // compare old pair and out pair + "cmp x8, {old_lo}", + "ccmp x9, {old_hi}, #0, eq", + "cset {r:w}, eq", + // store out pair to out + "stp x8, x9, [{out}]", + dst = in(reg) ptr_reg!(dst), + old = in(reg) ptr_reg!(old), + new = in(reg) ptr_reg!(new), + out = inout(reg) ptr_reg!(out_ptr) => _, + old_lo = out(reg) _, + old_hi = out(reg) _, + r = lateout(reg) r, + // new pair - must be allocated to even/odd register pair + out("x4") _, // new_lo + out("x5") _, // new_hi + // out pair - must be allocated to even/odd register pair + out("x8") _, // out_lo + out("x9") _, // out_hi + // Do not use `preserves_flags` because CMP and CCMP modify the condition flags. + options(nostack), + ); + debug_assert!(r == 0 || r == 1, "r={}", r); + (out, r != 0) + }}; + } + #[cfg(not(any(target_feature = "lse", atomic_maybe_uninit_target_feature = "lse")))] + macro_rules! cmpxchg { + ($acquire:tt, $release:tt, $fence:tt) => {{ + asm!( + // load from old/new to old/new pair + "ldp {new_lo}, {new_hi}, [{new}]", + "ldp {old_lo}, {old_hi}, [{old}]", + // (atomic) CAS (LL/SC loop) + "2:", + concat!("ld", $acquire, "xp {out_lo}, {out_hi}, [{dst}]"), + "cmp {out_lo}, {old_lo}", + "cset {r:w}, ne", + "cmp {out_hi}, {old_hi}", + "cinc {r:w}, {r:w}, ne", + "cbz {r:w}, 3f", // jump if compare succeed + concat!("st", $release, "xp {r:w}, {out_lo}, {out_hi}, [{dst}]"), + // 0 if the store was successful, 1 if no store was performed + "cbnz {r:w}, 2b", // continue loop if store failed + "mov {r:w}, #1", // mark as failed + "b 4f", + "3:", + concat!("st", $release, "xp {r:w}, {new_lo}, {new_hi}, [{dst}]"), + // 0 if the store was successful, 1 if no store was performed + "cbnz {r:w}, 2b", // continue loop if store failed + "4:", + $fence, + // store out_tmp to out + "stp {out_lo}, {out_hi}, [{out}]", + dst = inout(reg) ptr_reg!(dst) => _, + old = in(reg) ptr_reg!(old), + old_hi = out(reg) _, + old_lo = out(reg) _, + new = in(reg) ptr_reg!(new), + new_hi = out(reg) _, + new_lo = out(reg) _, + out = inout(reg) ptr_reg!(out_ptr) => _, + out_hi = out(reg) _, + out_lo = out(reg) _, + r = lateout(reg) r, + // Do not use `preserves_flags` because CMP modifies the condition flags. + options(nostack), + ); + debug_assert!(r == 0 || r == 1, "r={}", r); + // 0 if the store was successful, 1 if no store was performed + (out, r == 0) + }}; + } + atomic_rmw!(cmpxchg, order, write = success) + } + } + } + }; +} + +atomic128!(i128); +atomic128!(u128); + +#[macro_export] +macro_rules! cfg_has_atomic_8 { + ($($tt:tt)*) => { $($tt)* }; +} +#[macro_export] +macro_rules! cfg_no_atomic_8 { + ($($tt:tt)*) => {}; +} +#[macro_export] +macro_rules! cfg_has_atomic_16 { + ($($tt:tt)*) => { $($tt)* }; +} +#[macro_export] +macro_rules! cfg_no_atomic_16 { + ($($tt:tt)*) => {}; +} +#[macro_export] +macro_rules! cfg_has_atomic_32 { + ($($tt:tt)*) => { $($tt)* }; +} +#[macro_export] +macro_rules! cfg_no_atomic_32 { + ($($tt:tt)*) => {}; +} +#[macro_export] +macro_rules! cfg_has_atomic_64 { + ($($tt:tt)*) => { $($tt)* }; +} +#[macro_export] +macro_rules! cfg_no_atomic_64 { + ($($tt:tt)*) => {}; +} +#[macro_export] +macro_rules! cfg_has_atomic_128 { + ($($tt:tt)*) => { $($tt)* }; +} +#[macro_export] +macro_rules! cfg_no_atomic_128 { + ($($tt:tt)*) => {}; +} +#[macro_export] +macro_rules! cfg_has_atomic_cas { + ($($tt:tt)*) => { $($tt)* }; +} +#[macro_export] +macro_rules! cfg_no_atomic_cas { + ($($tt:tt)*) => {}; +} diff --git a/src/arch_legacy/arm.rs b/src/arch_legacy/arm.rs new file mode 100644 index 00000000..5733c9d5 --- /dev/null +++ b/src/arch_legacy/arm.rs @@ -0,0 +1,1210 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// ARMv6 and ARMv7 +// +// Refs: +// - ARM Architecture Reference Manual ARMv7-A and ARMv7-R edition +// https://developer.arm.com/documentation/ddi0406/cb +// - ARMv6 Differences +// https://developer.arm.com/documentation/ddi0406/cb/Appendixes/ARMv6-Differences?lang=en +// +// Generated asm: +// - armv7-a https://godbolt.org/z/P93x9TjWs +// - armv7-r https://godbolt.org/z/1z9q9vTcd +// - armv7-m https://godbolt.org/z/WozEfbMbx +// - armv6 https://godbolt.org/z/T5M337jYK +// - armv6-m https://godbolt.org/z/q88qPah4W + +use core::{ + mem::{self, MaybeUninit}, + sync::atomic::Ordering, +}; + +#[cfg(any( + any(target_feature = "v7", atomic_maybe_uninit_target_feature = "v7"), + not(any(target_feature = "mclass", atomic_maybe_uninit_target_feature = "mclass")), +))] +use crate::raw::{AtomicCompareExchange, AtomicSwap}; +use crate::raw::{AtomicLoad, AtomicStore}; + +#[cfg(any(target_feature = "v7", atomic_maybe_uninit_target_feature = "v7"))] +#[cfg(not(any(target_feature = "mclass", atomic_maybe_uninit_target_feature = "mclass")))] +macro_rules! dmb { + () => { + "dmb ish" + }; +} +// Only a full system barrier exists in the M-class architectures. +#[cfg(any(target_feature = "mclass", atomic_maybe_uninit_target_feature = "mclass"))] +macro_rules! dmb { + () => { + "dmb sy" + }; +} +// ARMv6 does not support `dmb`, so use use special instruction equivalent to a DMB. +// +// Refs: +// - https://reviews.llvm.org/D5386 +// - https://developer.arm.com/documentation/ddi0360/e/control-coprocessor-cp15/register-descriptions/c7--cache-operations-register?lang=en +#[cfg(not(all( + any(target_os = "linux", target_os = "android"), + not(atomic_maybe_uninit_use_cp15_barrier), +)))] +#[cfg(not(any(target_feature = "v7", atomic_maybe_uninit_target_feature = "v7")))] +#[cfg(not(any(target_feature = "mclass", atomic_maybe_uninit_target_feature = "mclass")))] +macro_rules! dmb { + () => { + "mcr p15, #0, r0, c7, c10, #5" + }; +} +// We prefer __kuser_memory_barrier over cp15_barrier because cp15_barrier is +// trapped and emulated by default on Linux/Android with ARMv8+ (or ARMv7+?). +// https://github.com/rust-lang/rust/issues/60605 +#[cfg(all( + any(target_os = "linux", target_os = "android"), + not(atomic_maybe_uninit_use_cp15_barrier), +))] +#[cfg(not(any(target_feature = "v7", atomic_maybe_uninit_target_feature = "v7")))] +#[cfg(not(any(target_feature = "mclass", atomic_maybe_uninit_target_feature = "mclass")))] +macro_rules! dmb { + () => { + "blx r0" + }; +} + +#[cfg(any( + any(target_feature = "v7", atomic_maybe_uninit_target_feature = "v7"), + not(any(target_feature = "mclass", atomic_maybe_uninit_target_feature = "mclass")), +))] +#[cfg(any(target_feature = "v7", atomic_maybe_uninit_target_feature = "v7"))] +macro_rules! clrex { + () => { + "clrex" + }; +} +#[cfg(any( + any(target_feature = "v7", atomic_maybe_uninit_target_feature = "v7"), + not(any(target_feature = "mclass", atomic_maybe_uninit_target_feature = "mclass")), +))] +#[cfg(not(any(target_feature = "v7", atomic_maybe_uninit_target_feature = "v7")))] +macro_rules! clrex { + () => { + "" + }; +} + +macro_rules! asm_no_dmb { + (options($($options:tt)*), $($asm:tt)*) => { + core::arch::asm!( + $($asm)* + options($($options)*), + ) + }; +} +#[cfg(any( + target_feature = "v7", + atomic_maybe_uninit_target_feature = "v7", + target_feature = "mclass", + atomic_maybe_uninit_target_feature = "mclass", +))] +macro_rules! asm_use_dmb { + (options($($options:tt)*), $($asm:tt)*) => { + core::arch::asm!( + $($asm)* + options($($options)*), + ) + }; +} +#[cfg(not(all( + any(target_os = "linux", target_os = "android"), + not(atomic_maybe_uninit_use_cp15_barrier), +)))] +#[cfg(not(any( + target_feature = "v7", + atomic_maybe_uninit_target_feature = "v7", + target_feature = "mclass", + atomic_maybe_uninit_target_feature = "mclass", +)))] +macro_rules! asm_use_dmb { + (options($($options:tt)*), $($asm:tt)*) => { + // In this case, dmb! calls `mcr p15, 0, , c7, c10, 5`, and the value in the Rd register should be zero (SBZ). + core::arch::asm!( + $($asm)* + inout("r0") 0_u32 => _, + options($($options)*), + ) + }; +} +#[cfg(all( + any(target_os = "linux", target_os = "android"), + not(atomic_maybe_uninit_use_cp15_barrier), +))] +#[cfg(not(any( + target_feature = "v7", + atomic_maybe_uninit_target_feature = "v7", + target_feature = "mclass", + atomic_maybe_uninit_target_feature = "mclass", +)))] +macro_rules! asm_use_dmb { + (options($($options:tt)*), $($asm:tt)*) => { + // In this case, dmb! calls __kuser_memory_barrier. + core::arch::asm!( + $($asm)* + // __kuser_memory_barrier (see also arm_linux.rs) + // https://www.kernel.org/doc/Documentation/arm/kernel_user_helpers.txt + inout("r0") 0xFFFF0FA0_usize => _, + out("lr") _, + options($($options)*), + ) + }; +} + +macro_rules! atomic { + ($int_type:ident, $asm_suffix:tt) => { + impl AtomicLoad for $int_type { + #[inline] + unsafe fn atomic_load( + src: *const MaybeUninit, + order: Ordering, + ) -> MaybeUninit { + debug_assert!(src as usize % mem::size_of::<$int_type>() == 0); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + macro_rules! atomic_load { + ($asm:ident, $acquire:expr) => { + $asm!( + options(nostack, preserves_flags), + // (atomic) load from src to tmp + concat!("ldr", $asm_suffix, " {tmp}, [{src}]"), + $acquire, // acquire fence + // store tmp to out + concat!("str", $asm_suffix, " {tmp}, [{out}]"), + src = in(reg) src, + out = inout(reg) out_ptr => _, + tmp = lateout(reg) _, + ) + }; + } + match order { + Ordering::Relaxed => atomic_load!(asm_no_dmb, ""), + // Acquire and SeqCst loads are equivalent. + Ordering::Acquire | Ordering::SeqCst => atomic_load!(asm_use_dmb, dmb!()), + _ => unreachable!("{:?}", order), + } + } + out + } + } + impl AtomicStore for $int_type { + #[inline] + unsafe fn atomic_store( + dst: *mut MaybeUninit, + val: MaybeUninit, + order: Ordering, + ) { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + let val = val.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + macro_rules! atomic_store { + ($asm:ident, $acquire:expr, $release:expr) => { + $asm!( + options(nostack, preserves_flags), + // load from val to tmp + concat!("ldr", $asm_suffix, " {tmp}, [{val}]"), + // (atomic) store tmp to dst + $release, // release fence + concat!("str", $asm_suffix, " {tmp}, [{dst}]"), + $acquire, // acquire fence + dst = inout(reg) dst => _, + val = in(reg) val, + tmp = lateout(reg) _, + ) + }; + } + match order { + Ordering::Relaxed => atomic_store!(asm_no_dmb, "", ""), + Ordering::Release => atomic_store!(asm_use_dmb, "", dmb!()), + Ordering::SeqCst => atomic_store!(asm_use_dmb, dmb!(), dmb!()), + _ => unreachable!("{:?}", order), + } + } + } + } + #[cfg(any( + any(target_feature = "v7", atomic_maybe_uninit_target_feature = "v7"), + not(any(target_feature = "mclass", atomic_maybe_uninit_target_feature = "mclass")), + ))] + impl AtomicSwap for $int_type { + #[inline] + unsafe fn atomic_swap( + dst: *mut MaybeUninit, + val: MaybeUninit, + order: Ordering, + ) -> MaybeUninit { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + let val = val.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + macro_rules! atomic_swap { + ($asm:ident, $acquire:expr, $release:expr) => { + $asm!( + // Do not use `preserves_flags` because CMP modifies the condition flags. + options(nostack), + // load from val (ptr) to val (val) + concat!("ldr", $asm_suffix, " {val}, [{val}]"), + // (atomic) swap (LL/SC loop) + $release, // release fence + "2:", + // load from dst to tmp + concat!("ldrex", $asm_suffix, " {tmp}, [{dst}]"), + // try to store val to dst + concat!("strex", $asm_suffix, " {r}, {val}, [{dst}]"), + // 0 if the store was successful, 1 if no store was performed + "cmp {r}, 0x0", + "bne 2b", + $acquire, // acquire fence + // store tmp to out + concat!("str", $asm_suffix, " {tmp}, [{out}]"), + dst = in(reg) dst, + val = inout(reg) val => _, + out = in(reg) out_ptr, + r = out(reg) _, + tmp = out(reg) _, + ) + }; + } + match order { + Ordering::Relaxed => atomic_swap!(asm_no_dmb, "", ""), + Ordering::Acquire => atomic_swap!(asm_use_dmb, dmb!(), ""), + Ordering::Release => atomic_swap!(asm_use_dmb, "", dmb!()), + // AcqRel and SeqCst swaps are equivalent. + Ordering::AcqRel | Ordering::SeqCst => { + atomic_swap!(asm_use_dmb, dmb!(), dmb!()); + } + _ => unreachable!("{:?}", order), + } + } + out + } + } + #[rustfmt::skip] + #[cfg(any( + any(target_feature = "v7", atomic_maybe_uninit_target_feature = "v7"), + not(any(target_feature = "mclass", atomic_maybe_uninit_target_feature = "mclass")), + ))] + impl AtomicCompareExchange for $int_type { + #[inline] + unsafe fn atomic_compare_exchange( + dst: *mut MaybeUninit, + old: MaybeUninit, + new: MaybeUninit, + success: Ordering, + failure: Ordering, + ) -> (MaybeUninit, bool) { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + let old = old.as_ptr(); + let new = new.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + use core::sync::atomic::Ordering::{AcqRel, Acquire, Relaxed, Release, SeqCst}; + let mut r: i32; + macro_rules! cmpxchg_store_relaxed { + ($asm:ident, $acquire_success:expr, $acquire_failure:expr) => { + $asm!( + // Do not use `preserves_flags` because CMP modifies the condition flags. + options(nostack), + // load from old/new (ptr) to old/new (val) + concat!("ldr", $asm_suffix, " {old}, [{old}]"), + concat!("ldr", $asm_suffix, " {new}, [{new}]"), + // (atomic) CAS (LL/SC loop) + "2:", + concat!("ldrex", $asm_suffix, " {tmp}, [{dst}]"), + "cmp {tmp}, {old}", + "bne 3f", // jump if compare failed + concat!("strex", $asm_suffix, " {r}, {new}, [{dst}]"), + // 0 if the store was successful, 1 if no store was performed + "cmp {r}, #0", + "bne 2b", // continue loop if store failed + $acquire_success, + "b 4f", + "3:", + // compare failed, set r to 1 + "mov {r}, #1", + clrex!(), + $acquire_failure, + "4:", + // store tmp to out + concat!("str", $asm_suffix, " {tmp}, [{out}]"), + dst = in(reg) dst, + r = out(reg) r, + old = inout(reg) old => _, + new = inout(reg) new => _, + out = in(reg) out_ptr, + tmp = out(reg) _, + ) + }; + } + macro_rules! cmpxchg_release { + ($acquire_failure:expr) => { + asm_use_dmb!( + // Do not use `preserves_flags` because CMP modifies the condition flags. + options(nostack), + // load from old/new (ptr) to old/new (val) + concat!("ldr", $asm_suffix, " {old}, [{old}]"), + concat!("ldr", $asm_suffix, " {new}, [{new}]"), + // (atomic) CAS (LL/SC loop) + concat!("ldrex", $asm_suffix, " {tmp}, [{dst}]"), + "cmp {tmp}, {old}", + "bne 3f", // jump if compare failed + dmb!(), // release + "2:", + concat!("strex", $asm_suffix, " {r}, {new}, [{dst}]"), + // 0 if the store was successful, 1 if no store was performed + "cmp {r}, #0", + "beq 4f", // jump if store succeed + concat!("ldrex", $asm_suffix, " {tmp}, [{dst}]"), + "cmp {tmp}, {old}", + "beq 2b", // continue loop if compare succeed + "3:", + // compare failed, set r to 1 + "mov {r}, #1", + clrex!(), + $acquire_failure, + "4:", + // store tmp to out + concat!("str", $asm_suffix, " {tmp}, [{out}]"), + dst = in(reg) dst, + r = out(reg) r, + old = inout(reg) old => _, + new = inout(reg) new => _, + out = in(reg) out_ptr, + tmp = out(reg) _, + ) + }; + } + macro_rules! cmpxchg_acqrel { + ($acquire_failure:expr) => { + asm_use_dmb!( + // Do not use `preserves_flags` because CMP modifies the condition flags. + options(nostack), + // load from old/new (ptr) to old/new (val) + concat!("ldr", $asm_suffix, " {old}, [{old}]"), + concat!("ldr", $asm_suffix, " {new}, [{new}]"), + // (atomic) CAS (LL/SC loop) + concat!("ldrex", $asm_suffix, " {tmp}, [{dst}]"), + "cmp {tmp}, {old}", + "bne 3f", // jump if compare failed + dmb!(), // release + "2:", + concat!("strex", $asm_suffix, " {r}, {new}, [{dst}]"), + // 0 if the store was successful, 1 if no store was performed + "cmp {r}, #0", + "beq 4f", // jump if store succeed + concat!("ldrex", $asm_suffix, " {tmp}, [{dst}]"), + "cmp {tmp}, {old}", + "beq 2b", // continue loop if compare succeed + "3:", + // compare failed, set r to 1 + "mov {r}, #1", + clrex!(), + $acquire_failure, + "b 5f", + "4:", // store succeed + dmb!(), // acquire_success + "5:", + // store tmp to out + concat!("str", $asm_suffix, " {tmp}, [{out}]"), + dst = in(reg) dst, + r = out(reg) r, + old = inout(reg) old => _, + new = inout(reg) new => _, + out = in(reg) out_ptr, + tmp = out(reg) _, + ) + }; + } + match (success, failure) { + (Relaxed, Relaxed) => cmpxchg_store_relaxed!(asm_no_dmb, "", ""), + (Relaxed, Acquire | SeqCst) => { + cmpxchg_store_relaxed!(asm_use_dmb, "", dmb!()); + } + (Acquire, Relaxed) => cmpxchg_store_relaxed!(asm_use_dmb, dmb!(), ""), + (Acquire, Acquire | SeqCst) => { + cmpxchg_store_relaxed!(asm_use_dmb, dmb!(), dmb!()); + } + (Release, Relaxed) => cmpxchg_release!(""), + (Release, Acquire | SeqCst) => cmpxchg_release!(dmb!()), + // AcqRel and SeqCst compare_exchange are equivalent. + (AcqRel | SeqCst, Relaxed) => cmpxchg_acqrel!(""), + (AcqRel | SeqCst, _) => cmpxchg_acqrel!(dmb!()), + _ => unreachable!("{:?}, {:?}", success, failure), + } + debug_assert!(r == 0 || r == 1, "r={}", r); + // 0 if the store was successful, 1 if no store was performed + (out, r == 0) + } + } + #[inline] + unsafe fn atomic_compare_exchange_weak( + dst: *mut MaybeUninit, + old: MaybeUninit, + new: MaybeUninit, + success: Ordering, + failure: Ordering, + ) -> (MaybeUninit, bool) { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + let old = old.as_ptr(); + let new = new.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + use core::sync::atomic::Ordering::{AcqRel, Acquire, Relaxed, Release, SeqCst}; + let mut r: i32; + macro_rules! cmpxchg_weak { + ($asm:ident, $acquire:expr, $release:expr) => { + $asm!( + // Do not use `preserves_flags` because CMP modifies the condition flags. + options(nostack), + // load from old/new (ptr) to old/new (val) + concat!("ldr", $asm_suffix, " {old}, [{old}]"), + concat!("ldr", $asm_suffix, " {new}, [{new}]"), + concat!("ldrex", $asm_suffix, " {tmp}, [{dst}]"), + "cmp {tmp}, {old}", + "bne 3f", // jump if compare failed + $release, + concat!("strex", $asm_suffix, " {r}, {new}, [{dst}]"), + "b 4f", + "3:", + // compare failed, set r to 1 + "mov {r}, #1", + clrex!(), + "4:", + $acquire, + // store tmp to out + concat!("str", $asm_suffix, " {tmp}, [{out}]"), + dst = in(reg) dst, + r = out(reg) r, + old = inout(reg) old => _, + new = inout(reg) new => _, + out = in(reg) out_ptr, + tmp = out(reg) _, + ) + }; + } + macro_rules! cmpxchg_weak_fail_load_relaxed { + ($release:expr) => { + asm_use_dmb!( + // Do not use `preserves_flags` because CMP modifies the condition flags. + options(nostack), + // load from old/new (ptr) to old/new (val) + concat!("ldr", $asm_suffix, " {old}, [{old}]"), + concat!("ldr", $asm_suffix, " {new}, [{new}]"), + concat!("ldrex", $asm_suffix, " {tmp}, [{dst}]"), + "cmp {tmp}, {old}", + "bne 3f", // jump if compare failed + $release, + concat!("strex", $asm_suffix, " {r}, {new}, [{dst}]"), + // 0 if the store was successful, 1 if no store was performed + "cmp {r}, #0", + "beq 4f", // jump if store succeed + "b 5f", // jump (store failed) + "3:", + // compare failed, set r to 1 + "mov {r}, #1", + clrex!(), + "b 5f", + "4:", // store succeed + dmb!(), // acquire_success + "5:", + // store tmp to out + concat!("str", $asm_suffix, " {tmp}, [{out}]"), + dst = in(reg) dst, + r = out(reg) r, + old = inout(reg) old => _, + new = inout(reg) new => _, + out = in(reg) out_ptr, + tmp = out(reg) _, + ) + }; + } + macro_rules! cmpxchg_weak_success_load_relaxed { + ($release:expr) => { + asm_use_dmb!( + // Do not use `preserves_flags` because CMP modifies the condition flags. + options(nostack), + // load from old/new (ptr) to old/new (val) + concat!("ldr", $asm_suffix, " {old}, [{old}]"), + concat!("ldr", $asm_suffix, " {new}, [{new}]"), + concat!("ldrex", $asm_suffix, " {tmp}, [{dst}]"), + "cmp {tmp}, {old}", + "bne 3f", // jump if compare failed + $release, + concat!("strex", $asm_suffix, " {r}, {new}, [{dst}]"), + // 0 if the store was successful, 1 if no store was performed + "cmp {r}, #0", + "beq 5f", // jump if store succeed + "b 4f", // jump (store failed) + "3:", + // compare failed, set r to 1 + "mov {r}, #1", + clrex!(), + "4:", // compare or store failed + dmb!(), // acquire_failure + "5:", + // store tmp to out + concat!("str", $asm_suffix, " {tmp}, [{out}]"), + dst = in(reg) dst, + r = out(reg) r, + old = inout(reg) old => _, + new = inout(reg) new => _, + out = in(reg) out_ptr, + tmp = out(reg) _, + ) + }; + } + match (success, failure) { + (Relaxed, Relaxed) => cmpxchg_weak!(asm_no_dmb, "", ""), + (Relaxed, Acquire | SeqCst) => cmpxchg_weak_success_load_relaxed!(""), + (Acquire, Relaxed) => cmpxchg_weak_fail_load_relaxed!(""), + (Acquire, Acquire | SeqCst) => cmpxchg_weak!(asm_use_dmb, dmb!(), ""), + (Release, Relaxed) => cmpxchg_weak!(asm_use_dmb, "", dmb!()), + (Release, Acquire | SeqCst) => cmpxchg_weak_success_load_relaxed!(dmb!()), + // AcqRel and SeqCst compare_exchange_weak are equivalent. + (AcqRel | SeqCst, Relaxed) => cmpxchg_weak_fail_load_relaxed!(dmb!()), + (AcqRel | SeqCst, _) => cmpxchg_weak!(asm_use_dmb, dmb!(), dmb!()), + _ => unreachable!("{:?}, {:?}", success, failure), + } + debug_assert!(r == 0 || r == 1, "r={}", r); + // 0 if the store was successful, 1 if no store was performed + (out, r == 0) + } + } + } + }; +} + +atomic!(i8, "b"); +atomic!(u8, "b"); +atomic!(i16, "h"); +atomic!(u16, "h"); +atomic!(i32, ""); +atomic!(u32, ""); +atomic!(isize, ""); +atomic!(usize, ""); + +// Refs: +// - https://developer.arm.com/documentation/ddi0406/cb/Application-Level-Architecture/Instruction-Details/Alphabetical-list-of-instructions/LDREXD +// - https://developer.arm.com/documentation/ddi0406/cb/Application-Level-Architecture/Instruction-Details/Alphabetical-list-of-instructions/STREXD +#[rustfmt::skip] +macro_rules! atomic64 { + ($int_type:ident) => { + #[cfg(not(any(target_feature = "mclass", atomic_maybe_uninit_target_feature = "mclass")))] + impl AtomicLoad for $int_type { + #[inline] + unsafe fn atomic_load( + src: *const MaybeUninit, + order: Ordering, + ) -> MaybeUninit { + debug_assert!(src as usize % mem::size_of::<$int_type>() == 0); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + macro_rules! atomic_load { + ($asm:ident, $acquire:expr) => { + $asm!( + options(nostack, preserves_flags), + // (atomic) load from src to tmp pair + "ldrexd r2, r3, [{src}]", + clrex!(), + $acquire, // acquire fence + // store tmp pair to out + "strd r2, r3, [{out}]", + src = in(reg) src, + out = in(reg) out_ptr, + // tmp pair - must be even-numbered and not R14 + out("r2") _, + out("r3") _, + ) + }; + } + match order { + Ordering::Relaxed => atomic_load!(asm_no_dmb, ""), + // Acquire and SeqCst loads are equivalent. + Ordering::Acquire | Ordering::SeqCst => atomic_load!(asm_use_dmb, dmb!()), + _ => unreachable!("{:?}", order), + } + } + out + } + } + #[cfg(not(any(target_feature = "mclass", atomic_maybe_uninit_target_feature = "mclass")))] + impl AtomicStore for $int_type { + #[inline] + unsafe fn atomic_store( + dst: *mut MaybeUninit, + val: MaybeUninit, + order: Ordering, + ) { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + let val = val.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + macro_rules! atomic_store { + ($asm:ident, $acquire:expr, $release:expr) => { + $asm!( + // Do not use `preserves_flags` because CMP modifies the condition flags. + options(nostack), + // load from val to val pair + "ldrd r2, r3, [{val}]", + // (atomic) store val pair to dst (LL/SC loop) + $release, // release fence + "2:", + // load from dst to tmp pair + "ldrexd r4, r5, [{dst}]", + // try to store val pair to dst + "strexd {r}, r2, r3, [{dst}]", + // 0 if the store was successful, 1 if no store was performed + "cmp {r}, 0x0", + "bne 2b", + $acquire, // acquire fence + dst = inout(reg) dst => _, + val = in(reg) val, + r = lateout(reg) _, + // val pair - must be even-numbered and not R14 + out("r2") _, + out("r3") _, + // tmp pair - must be even-numbered and not R14 + out("r4") _, + out("r5") _, + ) + }; + } + match order { + Ordering::Relaxed => atomic_store!(asm_no_dmb, "", ""), + Ordering::Release => atomic_store!(asm_use_dmb, "", dmb!()), + Ordering::SeqCst => atomic_store!(asm_use_dmb, dmb!(), dmb!()), + _ => unreachable!("{:?}", order), + } + } + } + } + #[cfg(not(any(target_feature = "mclass", atomic_maybe_uninit_target_feature = "mclass")))] + impl AtomicSwap for $int_type { + #[inline] + unsafe fn atomic_swap( + dst: *mut MaybeUninit, + val: MaybeUninit, + order: Ordering, + ) -> MaybeUninit { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + let val = val.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + macro_rules! atomic_swap { + ($asm:ident, $acquire:expr, $release:expr) => { + $asm!( + // Do not use `preserves_flags` because CMP modifies the condition flags. + options(nostack), + // load from val to val pair + "ldrd r2, r3, [{val}]", + // (atomic) swap (LL/SC loop) + $release, // release fence + "2:", + // load from dst to out pair + "ldrexd r4, r5, [{dst}]", + // try to store val pair to dst + "strexd {r}, r2, r3, [{dst}]", + // 0 if the store was successful, 1 if no store was performed + "cmp {r}, 0x0", + "bne 2b", + $acquire, // acquire fence + // store out pair to out + "strd r4, r5, [{out}]", + dst = inout(reg) dst => _, + val = in(reg) val, + out = inout(reg) out_ptr => _, + r = lateout(reg) _, + // val pair - must be even-numbered and not R14 + out("r2") _, + out("r3") _, + // out pair - must be even-numbered and not R14 + out("r4") _, + out("r5") _, + ) + }; + } + match order { + Ordering::Relaxed => atomic_swap!(asm_no_dmb, "", ""), + Ordering::Acquire => atomic_swap!(asm_use_dmb, dmb!(), ""), + Ordering::Release => atomic_swap!(asm_use_dmb, "", dmb!()), + // AcqRel and SeqCst swaps are equivalent. + Ordering::AcqRel | Ordering::SeqCst => atomic_swap!(asm_use_dmb, dmb!(), dmb!()), + _ => unreachable!("{:?}", order), + } + } + out + } + } + #[cfg(not(any(target_feature = "mclass", atomic_maybe_uninit_target_feature = "mclass")))] + impl AtomicCompareExchange for $int_type { + #[inline] + unsafe fn atomic_compare_exchange( + dst: *mut MaybeUninit, + old: MaybeUninit, + new: MaybeUninit, + success: Ordering, + failure: Ordering, + ) -> (MaybeUninit, bool) { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + let old = old.as_ptr(); + let new = new.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + use core::sync::atomic::Ordering::{AcqRel, Acquire, Relaxed, Release, SeqCst}; + let mut r: i32; + macro_rules! cmpxchg_store_relaxed { + ($asm:ident, $acquire_success:expr, $acquire_failure:expr) => { + $asm!( + // Do not use `preserves_flags` because CMP and ORRS modify the condition flags. + options(nostack), + "ldrd r2, r3, [{old}]", + "ldrd r8, r9, [{new}]", + // (atomic) CAS (LL/SC loop) + "2:", + "ldrexd r4, r5, [{dst}]", + "eor {tmp}, r5, r3", + "eor {r}, r4, r2", + "orrs {r}, {r}, {tmp}", + "bne 3f", // jump if compare failed + "strexd {r}, r8, r9, [{dst}]", + // 0 if the store was successful, 1 if no store was performed + "cmp {r}, #0", + "bne 2b", // continue loop if store failed + $acquire_success, + "b 4f", + "3:", + // compare failed, set r to 1 and clear exclusive + "mov {r}, #1", + clrex!(), + $acquire_failure, + "4:", + // store out pair to out + "strd r4, r5, [{out}]", + dst = inout(reg) dst => _, + r = lateout(reg) r, + old = in(reg) old, + new = in(reg) new, + out = inout(reg) out_ptr => _, + tmp = out(reg) _, + // old pair - must be even-numbered and not R14 + out("r2") _, + out("r3") _, + // out pair - must be even-numbered and not R14 + out("r4") _, + out("r5") _, + // new pair - must be even-numbered and not R14 + out("r8") _, + out("r9") _, + ) + }; + } + macro_rules! cmpxchg_release { + ($acquire_failure:expr) => { + asm_use_dmb!( + // Do not use `preserves_flags` because CMP and ORRS modify the condition flags. + options(nostack), + "ldrd r2, r3, [{old}]", + "ldrd r8, r9, [{new}]", + // (atomic) CAS (LL/SC loop) + "ldrexd r4, r5, [{dst}]", + "eor {tmp}, r5, r3", + "eor {r}, r4, r2", + "orrs {r}, {r}, {tmp}", + "bne 3f", // jump if compare failed + dmb!(), // release + "2:", + "strexd {r}, r8, r9, [{dst}]", + // 0 if the store was successful, 1 if no store was performed + "cmp {r}, #0", + "beq 4f", // jump if store succeed + "ldrexd r4, r5, [{dst}]", + "eor {tmp}, r5, r3", + "eor {r}, r4, r2", + "orrs {r}, {r}, {tmp}", + "beq 2b", // continue loop if compare succeed + "3:", + // compare failed, set r to 1 and clear exclusive + "mov {r}, #1", + clrex!(), + $acquire_failure, + "4:", + // store out pair to out + "strd r4, r5, [{out}]", + dst = inout(reg) dst => _, + r = lateout(reg) r, + old = in(reg) old, + new = in(reg) new, + out = inout(reg) out_ptr => _, + tmp = out(reg) _, + // old pair - must be even-numbered and not R14 + out("r2") _, + out("r3") _, + // out pair - must be even-numbered and not R14 + out("r4") _, + out("r5") _, + // new pair - must be even-numbered and not R14 + out("r8") _, + out("r9") _, + ) + }; + } + macro_rules! cmpxchg_acqrel { + ($acquire_failure:expr) => { + asm_use_dmb!( + // Do not use `preserves_flags` because CMP and ORRS modify the condition flags. + options(nostack), + "ldrd r2, r3, [{old}]", + "ldrd r8, r9, [{new}]", + // (atomic) CAS (LL/SC loop) + "ldrexd r4, r5, [{dst}]", + "eor {tmp}, r5, r3", + "eor {r}, r4, r2", + "orrs {r}, {r}, {tmp}", + "bne 3f", // jump if compare failed + dmb!(), // release + "2:", + "strexd {r}, r8, r9, [{dst}]", + // 0 if the store was successful, 1 if no store was performed + "cmp {r}, #0", + "beq 4f", // jump if store succeed + "ldrexd r4, r5, [{dst}]", + "eor {tmp}, r5, r3", + "eor {r}, r4, r2", + "orrs {r}, {r}, {tmp}", + "beq 2b", // continue loop if compare succeed + "3:", + // compare failed, set r to 1 and clear exclusive + "mov {r}, #1", + clrex!(), + $acquire_failure, + "b 5f", + "4:", // store succeed + dmb!(), // acquire_success + "5:", + // store out pair to out + "strd r4, r5, [{out}]", + dst = inout(reg) dst => _, + r = lateout(reg) r, + old = in(reg) old, + new = in(reg) new, + out = inout(reg) out_ptr => _, + tmp = out(reg) _, + // old pair - must be even-numbered and not R14 + out("r2") _, + out("r3") _, + // out pair - must be even-numbered and not R14 + out("r4") _, + out("r5") _, + // new pair - must be even-numbered and not R14 + out("r8") _, + out("r9") _, + ) + }; + } + match (success, failure) { + (Relaxed, Relaxed) => cmpxchg_store_relaxed!(asm_no_dmb, "", ""), + (Relaxed, Acquire | SeqCst) => cmpxchg_store_relaxed!(asm_use_dmb, "", dmb!()), + (Acquire, Relaxed) => cmpxchg_store_relaxed!(asm_use_dmb, dmb!(), ""), + (Acquire, Acquire | SeqCst) => cmpxchg_store_relaxed!(asm_use_dmb, dmb!(), dmb!()), + (Release, Relaxed) => cmpxchg_release!(""), + (Release, Acquire | SeqCst) => cmpxchg_release!(dmb!()), + // AcqRel and SeqCst compare_exchange are equivalent. + (AcqRel | SeqCst, Relaxed) => cmpxchg_acqrel!(""), + (AcqRel | SeqCst, _) => cmpxchg_acqrel!(dmb!()), + _ => unreachable!("{:?}, {:?}", success, failure), + } + debug_assert!(r == 0 || r == 1, "r={}", r); + // 0 if the store was successful, 1 if no store was performed + (out, r == 0) + } + } + #[inline] + unsafe fn atomic_compare_exchange_weak( + dst: *mut MaybeUninit, + old: MaybeUninit, + new: MaybeUninit, + success: Ordering, + failure: Ordering, + ) -> (MaybeUninit, bool) { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + let old = old.as_ptr(); + let new = new.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + use core::sync::atomic::Ordering::{AcqRel, Acquire, Relaxed, Release, SeqCst}; + let mut r: i32; + macro_rules! cmpxchg_weak { + ($asm:ident, $acquire:expr, $release:expr) => { + $asm!( + // Do not use `preserves_flags` because ORRS modifies the condition flags. + options(nostack), + "ldrd r2, r3, [{old}]", + "ldrd r8, r9, [{new}]", + "ldrexd r4, r5, [{dst}]", + "eor {tmp}, r5, r3", + "eor {r}, r4, r2", + "orrs {r}, {r}, {tmp}", + "bne 3f", // jump if compare failed + $release, + "strexd {r}, r8, r9, [{dst}]", + "b 4f", + "3:", + // compare failed, set r to 1 and clear exclusive + "mov {r}, #1", + clrex!(), + "4:", + $acquire, + // store out pair to out + "strd r4, r5, [{out}]", + dst = inout(reg) dst => _, + r = lateout(reg) r, + old = in(reg) old, + new = in(reg) new, + out = inout(reg) out_ptr => _, + tmp = out(reg) _, + // old pair - must be even-numbered and not R14 + out("r2") _, + out("r3") _, + // out pair - must be even-numbered and not R14 + out("r4") _, + out("r5") _, + // new pair - must be even-numbered and not R14 + out("r8") _, + out("r9") _, + ) + }; + } + macro_rules! cmpxchg_weak_fail_load_relaxed { + ($release:expr) => { + asm_use_dmb!( + // Do not use `preserves_flags` because CMP and ORRS modify the condition flags. + options(nostack), + "ldrd r2, r3, [{old}]", + "ldrd r8, r9, [{new}]", + "ldrexd r4, r5, [{dst}]", + "eor {tmp}, r5, r3", + "eor {r}, r4, r2", + "orrs {r}, {r}, {tmp}", + "bne 3f", // jump if compare failed + $release, + "strexd {r}, r8, r9, [{dst}]", + // 0 if the store was successful, 1 if no store was performed + "cmp {r}, #0", + "beq 4f", // jump if store succeed + "b 5f", // jump (store failed) + "3:", + // compare failed, set r to 1 and clear exclusive + "mov {r}, #1", + clrex!(), + "b 5f", + "4:", // store succeed + dmb!(), // acquire_success + "5:", + // store out pair to out + "strd r4, r5, [{out}]", + dst = inout(reg) dst => _, + r = lateout(reg) r, + old = in(reg) old, + new = in(reg) new, + out = inout(reg) out_ptr => _, + tmp = out(reg) _, + // old pair - must be even-numbered and not R14 + out("r2") _, + out("r3") _, + // out pair - must be even-numbered and not R14 + out("r4") _, + out("r5") _, + // new pair - must be even-numbered and not R14 + out("r8") _, + out("r9") _, + ) + }; + } + macro_rules! cmpxchg_weak_success_load_relaxed { + ($release:expr) => { + asm_use_dmb!( + // Do not use `preserves_flags` because CMP and ORRS modify the condition flags. + options(nostack), + "ldrd r2, r3, [{old}]", + "ldrd r8, r9, [{new}]", + "ldrexd r4, r5, [{dst}]", + "eor {tmp}, r5, r3", + "eor {r}, r4, r2", + "orrs {r}, {r}, {tmp}", + "bne 3f", // jump if compare failed + $release, + "strexd {r}, r8, r9, [{dst}]", + // 0 if the store was successful, 1 if no store was performed + "cmp {r}, #0", + "beq 5f", // jump if store succeed + "b 4f", // jump (store failed) + "3:", + // compare failed, set r to 1 and clear exclusive + "mov {r}, #1", + clrex!(), + "4:", // compare or store failed + dmb!(), // acquire_failure + "5:", + // store out pair to out + "strd r4, r5, [{out}]", + dst = inout(reg) dst => _, + r = lateout(reg) r, + old = in(reg) old, + new = in(reg) new, + out = inout(reg) out_ptr => _, + tmp = out(reg) _, + // old pair - must be even-numbered and not R14 + out("r2") _, + out("r3") _, + // out pair - must be even-numbered and not R14 + out("r4") _, + out("r5") _, + // new pair - must be even-numbered and not R14 + out("r8") _, + out("r9") _, + ) + }; + } + match (success, failure) { + (Relaxed, Relaxed) => cmpxchg_weak!(asm_no_dmb, "", ""), + (Relaxed, Acquire | SeqCst) => cmpxchg_weak_success_load_relaxed!(""), + (Acquire, Relaxed) => cmpxchg_weak_fail_load_relaxed!(""), + (Acquire, Acquire | SeqCst) => cmpxchg_weak!(asm_use_dmb, dmb!(), ""), + (Release, Relaxed) => cmpxchg_weak!(asm_use_dmb, "", dmb!()), + (Release, Acquire | SeqCst) => cmpxchg_weak_success_load_relaxed!(dmb!()), + // AcqRel and SeqCst compare_exchange_weak are equivalent. + (AcqRel | SeqCst, Relaxed) => cmpxchg_weak_fail_load_relaxed!(dmb!()), + (AcqRel | SeqCst, _) => cmpxchg_weak!(asm_use_dmb, dmb!(), dmb!()), + _ => unreachable!("{:?}, {:?}", success, failure), + } + debug_assert!(r == 0 || r == 1, "r={}", r); + // 0 if the store was successful, 1 if no store was performed + (out, r == 0) + } + } + } + }; +} + +atomic64!(i64); +atomic64!(u64); + +#[macro_export] +macro_rules! cfg_has_atomic_8 { + ($($tt:tt)*) => { $($tt)* }; +} +#[macro_export] +macro_rules! cfg_no_atomic_8 { + ($($tt:tt)*) => {}; +} +#[macro_export] +macro_rules! cfg_has_atomic_16 { + ($($tt:tt)*) => { $($tt)* }; +} +#[macro_export] +macro_rules! cfg_no_atomic_16 { + ($($tt:tt)*) => {}; +} +#[macro_export] +macro_rules! cfg_has_atomic_32 { + ($($tt:tt)*) => { $($tt)* }; +} +#[macro_export] +macro_rules! cfg_no_atomic_32 { + ($($tt:tt)*) => {}; +} +#[cfg(any(target_feature = "mclass", atomic_maybe_uninit_target_feature = "mclass"))] +#[macro_export] +macro_rules! cfg_has_atomic_64 { + ($($tt:tt)*) => {}; +} +#[cfg(any(target_feature = "mclass", atomic_maybe_uninit_target_feature = "mclass"))] +#[macro_export] +macro_rules! cfg_no_atomic_64 { + ($($tt:tt)*) => { $($tt)* }; +} +#[cfg(not(any(target_feature = "mclass", atomic_maybe_uninit_target_feature = "mclass")))] +#[macro_export] +macro_rules! cfg_has_atomic_64 { + ($($tt:tt)*) => { $($tt)* }; +} +#[cfg(not(any(target_feature = "mclass", atomic_maybe_uninit_target_feature = "mclass")))] +#[macro_export] +macro_rules! cfg_no_atomic_64 { + ($($tt:tt)*) => {}; +} +#[macro_export] +macro_rules! cfg_has_atomic_128 { + ($($tt:tt)*) => {}; +} +#[macro_export] +macro_rules! cfg_no_atomic_128 { + ($($tt:tt)*) => { $($tt)* }; +} +#[cfg(any( + any(target_feature = "v7", atomic_maybe_uninit_target_feature = "v7"), + not(any(target_feature = "mclass", atomic_maybe_uninit_target_feature = "mclass")), +))] +#[macro_export] +macro_rules! cfg_has_atomic_cas { + ($($tt:tt)*) => { $($tt)* }; +} +#[cfg(any( + any(target_feature = "v7", atomic_maybe_uninit_target_feature = "v7"), + not(any(target_feature = "mclass", atomic_maybe_uninit_target_feature = "mclass")), +))] +#[macro_export] +macro_rules! cfg_no_atomic_cas { + ($($tt:tt)*) => {}; +} +#[cfg(not(any( + any(target_feature = "v7", atomic_maybe_uninit_target_feature = "v7"), + not(any(target_feature = "mclass", atomic_maybe_uninit_target_feature = "mclass")), +)))] +#[macro_export] +macro_rules! cfg_has_atomic_cas { + ($($tt:tt)*) => {}; +} +#[cfg(not(any( + any(target_feature = "v7", atomic_maybe_uninit_target_feature = "v7"), + not(any(target_feature = "mclass", atomic_maybe_uninit_target_feature = "mclass")), +)))] +#[macro_export] +macro_rules! cfg_no_atomic_cas { + ($($tt:tt)*) => { $($tt)* }; +} diff --git a/src/arch_legacy/arm_linux.rs b/src/arch_legacy/arm_linux.rs new file mode 100644 index 00000000..d7edd73a --- /dev/null +++ b/src/arch_legacy/arm_linux.rs @@ -0,0 +1,670 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// Pre-v6 ARM Linux/Android +// +// Refs: +// - https://www.kernel.org/doc/Documentation/arm/kernel_user_helpers.txt +// - https://github.com/rust-lang/compiler-builtins/blob/0.1.88/src/arm_linux.rs +// - ARMv4 and ARMv5 Differences +// https://developer.arm.com/documentation/ddi0406/cb/Appendixes/ARMv4-and-ARMv5-Differences?lang=en +// +// Generated asm: +// - armv5te https://godbolt.org/z/r61s7cnG8 +// - armv4t https://godbolt.org/z/xrxfKx1rc + +#[path = "../arch/partword.rs"] +mod partword; + +use core::{ + arch::asm, + mem::{self, MaybeUninit}, + sync::atomic::Ordering, +}; + +use crate::raw::{AtomicCompareExchange, AtomicLoad, AtomicStore, AtomicSwap}; + +type XSize = usize; + +// https://www.kernel.org/doc/Documentation/arm/kernel_user_helpers.txt +const KUSER_HELPER_VERSION: usize = 0xFFFF0FFC; +// __kuser_helper_version >= 2 (kernel version 2.6.12+) +const KUSER_CMPXCHG: usize = 0xFFFF0FC0; +// __kuser_helper_version >= 3 (kernel version 2.6.15+) +const KUSER_MEMORY_BARRIER: usize = 0xFFFF0FA0; +// __kuser_helper_version >= 5 (kernel version 3.1+) +const KUSER_CMPXCHG64: usize = 0xFFFF0F60; + +#[inline] +fn kuser_helper_version() -> i32 { + // SAFETY: core assumes that at least __kuser_memory_barrier (__kuser_helper_version >= 3) is + // available on this platform. __kuser_helper_version is always available on such a platform. + unsafe { (KUSER_HELPER_VERSION as *const i32).read() } +} + +#[cfg(any(target_feature = "v5te", atomic_maybe_uninit_target_feature = "v5te"))] +macro_rules! blx { + ($addr:tt) => { + concat!("blx ", $addr) + }; +} +#[cfg(not(any(target_feature = "v5te", atomic_maybe_uninit_target_feature = "v5te")))] +macro_rules! blx { + ($addr:tt) => { + concat!("mov lr, pc", "\n", "bx ", $addr) + }; +} + +macro_rules! atomic_load_store { + ($int_type:ident, $asm_suffix:tt) => { + impl AtomicLoad for $int_type { + #[inline] + unsafe fn atomic_load( + src: *const MaybeUninit, + order: Ordering, + ) -> MaybeUninit { + debug_assert!(src as usize % mem::size_of::<$int_type>() == 0); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + match order { + Ordering::Relaxed => { + asm!( + // (atomic) load from src to tmp + concat!("ldr", $asm_suffix, " {tmp}, [{src}]"), + // store tmp to out + concat!("str", $asm_suffix, " {tmp}, [{out}]"), + src = in(reg) src, + out = inout(reg) out_ptr => _, + tmp = lateout(reg) _, + options(nostack, preserves_flags), + ); + } + // Acquire and SeqCst loads are equivalent. + Ordering::Acquire | Ordering::SeqCst => { + debug_assert!(kuser_helper_version() >= 3); + asm!( + // (atomic) load from src to tmp + concat!("ldr", $asm_suffix, " {tmp}, [{src}]"), + blx!("{kuser_memory_barrier}"), // acquire fence + // store tmp to out + concat!("str", $asm_suffix, " {tmp}, [{out}]"), + src = in(reg) src, + out = inout(reg) out_ptr => _, + tmp = lateout(reg) _, + kuser_memory_barrier = inout(reg) KUSER_MEMORY_BARRIER => _, + out("lr") _, + options(nostack, preserves_flags), + ); + } + _ => unreachable!("{:?}", order), + } + } + out + } + } + impl AtomicStore for $int_type { + #[inline] + unsafe fn atomic_store( + dst: *mut MaybeUninit, + val: MaybeUninit, + order: Ordering, + ) { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + let val = val.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + macro_rules! atomic_store_release { + ($acquire:expr) => {{ + debug_assert!(kuser_helper_version() >= 3); + asm!( + // load from val to tmp + concat!("ldr", $asm_suffix, " {tmp}, [{val}]"), + // (atomic) store tmp to dst + blx!("{kuser_memory_barrier}"), // release fence + concat!("str", $asm_suffix, " {tmp}, [{dst}]"), + $acquire, // acquire fence + dst = inout(reg) dst => _, + val = in(reg) val, + tmp = lateout(reg) _, + kuser_memory_barrier = inout(reg) KUSER_MEMORY_BARRIER => _, + out("lr") _, + options(nostack, preserves_flags), + ) + }}; + } + match order { + Ordering::Relaxed => { + asm!( + // load from val to tmp + concat!("ldr", $asm_suffix, " {tmp}, [{val}]"), + // (atomic) store tmp to dst + concat!("str", $asm_suffix, " {tmp}, [{dst}]"), + dst = inout(reg) dst => _, + val = in(reg) val, + tmp = lateout(reg) _, + options(nostack, preserves_flags), + ); + } + Ordering::Release => atomic_store_release!(""), + Ordering::SeqCst => atomic_store_release!(blx!("{kuser_memory_barrier}")), + _ => unreachable!("{:?}", order), + } + } + } + } + }; +} + +macro_rules! atomic { + ($int_type:ident) => { + atomic_load_store!($int_type, ""); + impl AtomicSwap for $int_type { + #[inline] + unsafe fn atomic_swap( + dst: *mut MaybeUninit, + val: MaybeUninit, + _order: Ordering, + ) -> MaybeUninit { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + debug_assert!(kuser_helper_version() >= 2); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + let val = val.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + asm!( + "ldr r1, [r1]", // new_val + "2:", + "ldr r0, [r2]", // old_val + "mov {out_tmp}, r0", + blx!("{kuser_cmpxchg}"), + "cmp r0, #0", + "bne 2b", + "str {out_tmp}, [{out}]", + out = in(reg) out_ptr, + out_tmp = out(reg) _, + kuser_cmpxchg = in(reg) KUSER_CMPXCHG, + out("r0") _, + inout("r1") val => _, + in("r2") dst, // ptr + out("r3") _, + out("ip") _, + out("lr") _, + // Do not use `preserves_flags` because CMP and __kuser_cmpxchg modify the condition flags. + options(nostack), + ); + } + out + } + } + impl AtomicCompareExchange for $int_type { + #[inline] + unsafe fn atomic_compare_exchange( + dst: *mut MaybeUninit, + old: MaybeUninit, + new: MaybeUninit, + _success: Ordering, + _failure: Ordering, + ) -> (MaybeUninit, bool) { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + debug_assert!(kuser_helper_version() >= 2); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + let old = old.as_ptr(); + let new = new.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + let mut r: i32; + asm!( + "ldr {old}, [{old}]", + "ldr {new}, [{new}]", + "2:", + "ldr r0, [r2]", // old_val + "mov {out_tmp}, r0", + "cmp r0, {old}", + "bne 3f", + "mov r1, {new}", // new_val + blx!("{kuser_cmpxchg}"), + "cmp r0, #0", + "bne 2b", + "b 4f", + "3:", + // write back to synchronize + "mov r1, r0", // new_val + blx!("{kuser_cmpxchg}"), + "cmp r0, #0", + "bne 2b", + "mov r0, #1", + "4:", + "str {out_tmp}, [{out}]", + old = inout(reg) old => _, + new = inout(reg) new => _, + out = in(reg) out_ptr, + out_tmp = out(reg) _, + kuser_cmpxchg = in(reg) KUSER_CMPXCHG, + out("r0") r, + out("r1") _, + in("r2") dst, // ptr + out("r3") _, + out("ip") _, + out("lr") _, + // Do not use `preserves_flags` because CMP and __kuser_cmpxchg modify the condition flags. + options(nostack), + ); + debug_assert!(r == 0 || r == 1, "r={}", r); + // 0 if the store was successful, 1 if no store was performed + (out, r == 0) + } + } + } + }; +} + +macro_rules! atomic_sub_word { + ($int_type:ident, $asm_suffix:tt) => { + atomic_load_store!($int_type, $asm_suffix); + impl AtomicSwap for $int_type { + #[inline] + unsafe fn atomic_swap( + dst: *mut MaybeUninit, + val: MaybeUninit, + _order: Ordering, + ) -> MaybeUninit { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + debug_assert!(kuser_helper_version() >= 2); + let (aligned_ptr, shift, mask) = partword::create_mask_values(dst); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + let val = val.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + asm!( + concat!("ldr", $asm_suffix, " {val}, [{val}]"), + "lsl {mask}, {mask}, {shift}", + "lsl {val}, {val}, {shift}", + "and {val}, {val}, {mask}", + "mvn {inv_mask}, {mask}", + "2:", + "ldr r0, [r2]", // old_val + "mov {out_tmp}, r0", + "and r1, r0, {inv_mask}", + "orr r1, r1, {val}", // new_val + blx!("{kuser_cmpxchg}"), + "cmp r0, #0", + "bne 2b", + "lsr {out_tmp}, {out_tmp}, {shift}", + concat!("str", $asm_suffix, " {out_tmp}, [{out}]"), + val = inout(reg) val => _, + out = in(reg) out_ptr, + shift = in(reg) shift, + mask = inout(reg) mask => _, + inv_mask = out(reg) _, + out_tmp = out(reg) _, + kuser_cmpxchg = in(reg) KUSER_CMPXCHG, + out("r0") _, + out("r1") _, + in("r2") aligned_ptr, // ptr + out("r3") _, + out("ip") _, + out("lr") _, + // Do not use `preserves_flags` because CMP and __kuser_cmpxchg modify the condition flags. + options(nostack), + ); + } + out + } + } + impl AtomicCompareExchange for $int_type { + #[inline] + unsafe fn atomic_compare_exchange( + dst: *mut MaybeUninit, + old: MaybeUninit, + new: MaybeUninit, + _success: Ordering, + _failure: Ordering, + ) -> (MaybeUninit, bool) { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + debug_assert!(kuser_helper_version() >= 2); + let (aligned_ptr, shift, mask) = partword::create_mask_values(dst); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + let old = old.as_ptr(); + let new = new.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + let mut r: i32; + asm!( + concat!("ldr", $asm_suffix, " {old}, [{old}]"), + concat!("ldr", $asm_suffix, " {new}, [{new}]"), + "lsl {mask}, {mask}, {shift}", + "lsl {old}, {old}, {shift}", + "lsl {new}, {new}, {shift}", + "and {old}, {old}, {mask}", + "and {new}, {new}, {mask}", + // We cannot create inv_mask here because there are no available registers + // "mvn {inv_mask}, {mask}", + "2:", + "ldr r0, [r2]", // old_val + "and {out_tmp}, r0, {mask}", + "cmp {out_tmp}, {old}", + "bne 3f", + "mvn r1, {mask}", + "and r1, r0, r1", + "orr r1, r1, {new}", // new_val + blx!("{kuser_cmpxchg}"), + "cmp r0, #0", + "bne 2b", + "b 4f", + "3:", + // write back to synchronize + "mov r1, r0", // new_val + blx!("{kuser_cmpxchg}"), + "cmp r0, #0", + "bne 2b", + "mov r0, #1", + "4:", + "lsr {out_tmp}, {out_tmp}, {shift}", + concat!("str", $asm_suffix, " {out_tmp}, [{out}]"), + old = inout(reg) old => _, + new = inout(reg) new => _, + out = in(reg) out_ptr, + shift = in(reg) shift, + mask = inout(reg) mask => _, + out_tmp = out(reg) _, + kuser_cmpxchg = in(reg) KUSER_CMPXCHG, + out("r0") r, + out("r1") _, + in("r2") aligned_ptr, // ptr + out("r3") _, + out("ip") _, + out("lr") _, + // Do not use `preserves_flags` because CMP and __kuser_cmpxchg modify the condition flags. + options(nostack), + ); + debug_assert!(r == 0 || r == 1, "r={}", r); + // 0 if the store was successful, 1 if no store was performed + (out, r == 0) + } + } + } + }; +} + +atomic_sub_word!(i8, "b"); +atomic_sub_word!(u8, "b"); +atomic_sub_word!(i16, "h"); +atomic_sub_word!(u16, "h"); +atomic!(i32); +atomic!(u32); +atomic!(isize); +atomic!(usize); + +macro_rules! atomic64 { + ($int_type:ident) => { + impl AtomicLoad for $int_type { + #[inline] + unsafe fn atomic_load( + src: *const MaybeUninit, + _order: Ordering, + ) -> MaybeUninit { + debug_assert!(src as usize % mem::size_of::<$int_type>() == 0); + assert_has_kuser_cmpxchg64(); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + asm!( + "2:", + "ldr r0, [r2]", + "ldr r3, [r2, #4]", + "str r0, [r1]", + "str r3, [r1, #4]", + "mov r0, r1", // old_val + blx!("{kuser_cmpxchg64}"), + "cmp r0, #0", + "bne 2b", + kuser_cmpxchg64 = in(reg) KUSER_CMPXCHG64, + out("r0") _, + in("r1") out_ptr, // new_val + in("r2") src, // ptr + out("r3") _, + out("lr") _, + // Do not use `preserves_flags` because CMP and __kuser_cmpxchg64 modify the condition flags. + options(nostack), + ); + } + out + } + } + impl AtomicStore for $int_type { + #[inline] + unsafe fn atomic_store( + dst: *mut MaybeUninit, + val: MaybeUninit, + _order: Ordering, + ) { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + assert_has_kuser_cmpxchg64(); + let val = val.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + let mut out_tmp = MaybeUninit::::uninit(); + asm!( + "2:", + "ldr r0, [r2]", + "ldr r3, [r2, #4]", + "str r0, [{out_tmp}]", + "str r3, [{out_tmp}, #4]", + "mov r0, {out_tmp}", // old_val + blx!("{kuser_cmpxchg64}"), + "cmp r0, #0", + "bne 2b", + out_tmp = in(reg) out_tmp.as_mut_ptr(), + kuser_cmpxchg64 = in(reg) KUSER_CMPXCHG64, + out("r0") _, + in("r1") val, // new_val + in("r2") dst, // ptr + out("r3") _, + out("lr") _, + // Do not use `preserves_flags` because CMP and __kuser_cmpxchg64 modify the condition flags. + options(nostack), + ); + } + } + } + impl AtomicSwap for $int_type { + #[inline] + unsafe fn atomic_swap( + dst: *mut MaybeUninit, + val: MaybeUninit, + _order: Ordering, + ) -> MaybeUninit { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + assert_has_kuser_cmpxchg64(); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + let val = val.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + asm!( + "2:", + "ldr r0, [r2]", + "ldr r3, [r2, #4]", + "str r0, [{out_tmp}]", + "str r3, [{out_tmp}, #4]", + "mov r0, {out_tmp}", // old_val + blx!("{kuser_cmpxchg64}"), + "cmp r0, #0", + "bne 2b", + out_tmp = in(reg) out_ptr, + kuser_cmpxchg64 = in(reg) KUSER_CMPXCHG64, + out("r0") _, + in("r1") val, // new_val + in("r2") dst, // ptr + out("r3") _, + out("lr") _, + // Do not use `preserves_flags` because CMP and __kuser_cmpxchg64 modify the condition flags. + options(nostack), + ); + } + out + } + } + impl AtomicCompareExchange for $int_type { + #[inline] + unsafe fn atomic_compare_exchange( + dst: *mut MaybeUninit, + old: MaybeUninit, + new: MaybeUninit, + _success: Ordering, + _failure: Ordering, + ) -> (MaybeUninit, bool) { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + assert_has_kuser_cmpxchg64(); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + let old = old.as_ptr(); + let new = new.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + let mut r: i32; + asm!( + "ldr {old_lo}, [{old_hi}]", + "ldr {old_hi}, [{old_hi}, #4]", + "2:", + "ldr r0, [r2]", + "ldr r3, [r2, #4]", + "str r0, [{out_tmp}]", + "str r3, [{out_tmp}, #4]", + "eor r0, r0, {old_lo}", + "eor r3, r3, {old_hi}", + "orrs r0, r0, r3", + "bne 3f", + "mov r0, {out_tmp}", // old_val + "mov r1, {new}", // new_val + blx!("{kuser_cmpxchg64}"), + "cmp r0, #0", + "bne 2b", + "b 4f", + "3:", + // write back to ensure atomicity + "mov r0, {out_tmp}", // old_val + "mov r1, {out_tmp}", // new_val + blx!("{kuser_cmpxchg64}"), + "cmp r0, #0", + "bne 2b", + "mov r0, #1", + "4:", + new = in(reg) new, + out_tmp = in(reg) out_ptr, + old_lo = out(reg) _, + old_hi = inout(reg) old => _, + kuser_cmpxchg64 = in(reg) KUSER_CMPXCHG64, + out("r0") r, + out("r1") _, + in("r2") dst, // ptr + out("r3") _, + out("lr") _, + // Do not use `preserves_flags` because CMP, ORRS, and __kuser_cmpxchg64 modify the condition flags. + options(nostack), + ); + debug_assert!(r == 0 || r == 1, "r={}", r); + // 0 if the store was successful, 1 if no store was performed + (out, r == 0) + } + } + } + }; +} + +atomic64!(i64); +atomic64!(u64); + +// TODO: Since Rust 1.64, the Linux kernel requirement for Rust when using std is 3.2+, so it +// should be possible to convert this to debug_assert if the std feature is enabled on Rust 1.64+. +// https://blog.rust-lang.org/2022/08/01/Increasing-glibc-kernel-requirements.html +#[inline] +fn assert_has_kuser_cmpxchg64() { + if kuser_helper_version() < 5 { + #[cold] + fn p() -> ! { + panic!("64-bit atomics on pre-v6 ARM requires Linux kernel version 3.1+") + } + p() + } +} + +#[macro_export] +macro_rules! cfg_has_atomic_8 { + ($($tt:tt)*) => { $($tt)* }; +} +#[macro_export] +macro_rules! cfg_no_atomic_8 { + ($($tt:tt)*) => {}; +} +#[macro_export] +macro_rules! cfg_has_atomic_16 { + ($($tt:tt)*) => { $($tt)* }; +} +#[macro_export] +macro_rules! cfg_no_atomic_16 { + ($($tt:tt)*) => {}; +} +#[macro_export] +macro_rules! cfg_has_atomic_32 { + ($($tt:tt)*) => { $($tt)* }; +} +#[macro_export] +macro_rules! cfg_no_atomic_32 { + ($($tt:tt)*) => {}; +} +// TODO: set has_atomic_64 to true +#[macro_export] +macro_rules! cfg_has_atomic_64 { + ($($tt:tt)*) => {}; +} +#[macro_export] +macro_rules! cfg_no_atomic_64 { + ($($tt:tt)*) => { $($tt)* }; +} +#[macro_export] +macro_rules! cfg_has_atomic_128 { + ($($tt:tt)*) => {}; +} +#[macro_export] +macro_rules! cfg_no_atomic_128 { + ($($tt:tt)*) => { $($tt)* }; +} +#[macro_export] +macro_rules! cfg_has_atomic_cas { + ($($tt:tt)*) => { $($tt)* }; +} +#[macro_export] +macro_rules! cfg_no_atomic_cas { + ($($tt:tt)*) => {}; +} + +#[cfg(test)] +mod tests { + #[test] + fn kuser_helper_version() { + let version = super::kuser_helper_version(); + assert!(version >= 5, "{:?}", version); + } + + // TODO: set has_atomic_64 to true + test_atomic!(i64); + test_atomic!(u64); + stress_test!(u64); +} diff --git a/src/arch_legacy/armv8.rs b/src/arch_legacy/armv8.rs new file mode 100644 index 00000000..c96f8bde --- /dev/null +++ b/src/arch_legacy/armv8.rs @@ -0,0 +1,641 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// ARMv8 AArch32 +// +// LLVM doesn't generate CLREX for ARMv8-M Baseline, but it actually supports CLREX. +// https://developer.arm.com/documentation/dui1095/a/The-Cortex-M23-Instruction-Set/Memory-access-instructions?lang=en +// https://community.arm.com/cfs-file/__key/telligent-evolution-components-attachments/01-2057-00-00-00-01-28-35/Cortex_2D00_M-for-Beginners-_2D00_-2017_5F00_EN_5F00_v2.pdf +// +// Refs: +// - Arm Architecture Reference Manual for A-profile architecture +// https://developer.arm.com/documentation/ddi0487/latest +// - Armv8-M Architecture Reference Manual +// https://developer.arm.com/documentation/ddi0553/latest +// +// Generated asm: +// - armv8-a https://godbolt.org/z/Mx8z81463 +// - armv8-m baseline https://godbolt.org/z/P51ezojjW +// - armv8-m mainline https://godbolt.org/z/WdajnbYTr + +use core::{ + arch::asm, + mem::{self, MaybeUninit}, + sync::atomic::Ordering, +}; + +use crate::raw::{AtomicCompareExchange, AtomicLoad, AtomicStore, AtomicSwap}; + +macro_rules! atomic_rmw { + ($op:ident, $order:ident) => { + match $order { + Ordering::Relaxed => $op!("r", "r"), + Ordering::Acquire => $op!("a", "r"), + Ordering::Release => $op!("r", "l"), + // AcqRel and SeqCst RMWs are equivalent. + Ordering::AcqRel | Ordering::SeqCst => $op!("a", "l"), + _ => unreachable!("{:?}", $order), + } + }; +} + +// Adds S suffix if needed. We prefer instruction without S suffix, +// but ARMv8-M Baseline doesn't support thumb2 instructions. +#[cfg(not(any(target_feature = "mclass", atomic_maybe_uninit_target_feature = "mclass")))] +macro_rules! s { + ($op:tt, $operand:tt) => { + concat!($op, " ", $operand) + }; +} +#[cfg(any(target_feature = "mclass", atomic_maybe_uninit_target_feature = "mclass"))] +macro_rules! s { + ($op:tt, $operand:tt) => { + concat!($op, "s ", $operand) + }; +} + +macro_rules! atomic { + ($int_type:ident, $asm_suffix:tt) => { + impl AtomicLoad for $int_type { + #[inline] + unsafe fn atomic_load( + src: *const MaybeUninit, + order: Ordering, + ) -> MaybeUninit { + debug_assert!(src as usize % mem::size_of::<$int_type>() == 0); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + macro_rules! atomic_load { + ($acquire:tt) => { + asm!( + // (atomic) load from src to tmp + concat!("ld", $acquire, $asm_suffix, " {tmp}, [{src}]"), + // store tmp to out + concat!("str", $asm_suffix, " {tmp}, [{out}]"), + src = in(reg) src, + out = inout(reg) out_ptr => _, + tmp = lateout(reg) _, + options(nostack, preserves_flags), + ) + }; + } + match order { + Ordering::Relaxed => atomic_load!("r"), + // Acquire and SeqCst loads are equivalent. + Ordering::Acquire | Ordering::SeqCst => atomic_load!("a"), + _ => unreachable!("{:?}", order), + } + } + out + } + } + impl AtomicStore for $int_type { + #[inline] + unsafe fn atomic_store( + dst: *mut MaybeUninit, + val: MaybeUninit, + order: Ordering, + ) { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + let val = val.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + macro_rules! atomic_store { + ($release:tt) => { + asm!( + // load from val to tmp + concat!("ldr", $asm_suffix, " {tmp}, [{val}]"), + // (atomic) store tmp to dst + concat!("st", $release, $asm_suffix, " {tmp}, [{dst}]"), + dst = inout(reg) dst => _, + val = in(reg) val, + tmp = lateout(reg) _, + options(nostack, preserves_flags), + ) + }; + } + match order { + Ordering::Relaxed => atomic_store!("r"), + // Release and SeqCst stores are equivalent. + Ordering::Release | Ordering::SeqCst => atomic_store!("l"), + _ => unreachable!("{:?}", order), + } + } + } + } + impl AtomicSwap for $int_type { + #[inline] + unsafe fn atomic_swap( + dst: *mut MaybeUninit, + val: MaybeUninit, + order: Ordering, + ) -> MaybeUninit { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + let val = val.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + macro_rules! swap { + ($acquire:tt, $release:tt) => { + asm!( + // load from val (ptr) to val (val) + concat!("ldr", $asm_suffix, " {val}, [{val}]"), + // (atomic) swap (LL/SC loop) + "2:", + // load from dst to tmp + concat!("ld", $acquire, "ex", $asm_suffix, " {tmp}, [{dst}]"), + // try to store val to dst + concat!("st", $release, "ex", $asm_suffix, " {r}, {val}, [{dst}]"), + // 0 if the store was successful, 1 if no store was performed + "cmp {r}, 0x0", + "bne 2b", + // store tmp to out + concat!("str", $asm_suffix, " {tmp}, [{out}]"), + dst = in(reg) dst, + val = inout(reg) val => _, + out = in(reg) out_ptr, + r = out(reg) _, + tmp = out(reg) _, + // Do not use `preserves_flags` because CMP modifies the condition flags. + options(nostack), + ) + }; + } + atomic_rmw!(swap, order); + } + out + } + } + impl AtomicCompareExchange for $int_type { + #[inline] + unsafe fn atomic_compare_exchange( + dst: *mut MaybeUninit, + old: MaybeUninit, + new: MaybeUninit, + success: Ordering, + failure: Ordering, + ) -> (MaybeUninit, bool) { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + let order = crate::utils::upgrade_success_ordering(success, failure); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + let old = old.as_ptr(); + let new = new.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + let mut r: i32; + macro_rules! cmpxchg { + ($acquire:tt, $release:tt) => { + asm!( + // load from old/new (ptr) to old/new (val) + concat!("ldr", $asm_suffix, " {old}, [{old}]"), + concat!("ldr", $asm_suffix, " {new}, [{new}]"), + // (atomic) CAS (LL/SC loop) + "2:", + // load from dst to tmp + concat!("ld", $acquire, "ex", $asm_suffix, " {tmp}, [{dst}]"), + "cmp {tmp}, {old}", + "bne 3f", // jump if compare failed + // try to store val to dst + concat!("st", $release, "ex", $asm_suffix, " {r}, {new}, [{dst}]"), + // 0 if the store was successful, 1 if no store was performed + "cmp {r}, #0", + "bne 2b", // continue loop if store failed + "b 4f", + "3:", + // compare failed, mark r as failed and clear exclusive + "clrex", + s!("mov", "{r}, #1"), + "4:", + // store tmp to out + concat!("str", $asm_suffix, " {tmp}, [{out}]"), + dst = in(reg) dst, + old = inout(reg) old => _, + new = inout(reg) new => _, + out = in(reg) out_ptr, + r = out(reg) r, + tmp = out(reg) _, + // Do not use `preserves_flags` because CMP and s! modify the condition flags. + options(nostack), + ) + }; + } + atomic_rmw!(cmpxchg, order); + debug_assert!(r == 0 || r == 1, "r={}", r); + // 0 if the store was successful, 1 if no store was performed + (out, r == 0) + } + } + #[inline] + unsafe fn atomic_compare_exchange_weak( + dst: *mut MaybeUninit, + old: MaybeUninit, + new: MaybeUninit, + success: Ordering, + failure: Ordering, + ) -> (MaybeUninit, bool) { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + let order = crate::utils::upgrade_success_ordering(success, failure); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + let old = old.as_ptr(); + let new = new.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + let mut r: i32; + macro_rules! cmpxchg_weak { + ($acquire:tt, $release:tt) => { + asm!( + // load from old/new (ptr) to old/new (val) + concat!("ldr", $asm_suffix, " {old}, [{old}]"), + concat!("ldr", $asm_suffix, " {new}, [{new}]"), + // load from dst to tmp + concat!("ld", $acquire, "ex", $asm_suffix, " {tmp}, [{dst}]"), + "cmp {tmp}, {old}", + "bne 3f", + // try to store new to dst + concat!("st", $release, "ex", $asm_suffix, " {r}, {new}, [{dst}]"), + "b 4f", + "3:", + // compare failed, mark r as failed and clear exclusive + "clrex", + s!("mov", "{r}, #1"), + "4:", + // store tmp to out + concat!("str", $asm_suffix, " {tmp}, [{out}]"), + dst = in(reg) dst, + old = inout(reg) old => _, + new = inout(reg) new => _, + out = in(reg) out_ptr, + r = out(reg) r, + tmp = out(reg) _, + // Do not use `preserves_flags` because CMP and s! modify the condition flags. + options(nostack), + ) + }; + } + atomic_rmw!(cmpxchg_weak, order); + debug_assert!(r == 0 || r == 1, "r={}", r); + // 0 if the store was successful, 1 if no store was performed + (out, r == 0) + } + } + } + }; +} + +atomic!(i8, "b"); +atomic!(u8, "b"); +atomic!(i16, "h"); +atomic!(u16, "h"); +atomic!(i32, ""); +atomic!(u32, ""); +atomic!(isize, ""); +atomic!(usize, ""); + +#[rustfmt::skip] +macro_rules! atomic64 { + ($int_type:ident) => { + #[cfg(not(any(target_feature = "mclass", atomic_maybe_uninit_target_feature = "mclass")))] + impl AtomicLoad for $int_type { + #[inline] + unsafe fn atomic_load( + src: *const MaybeUninit, + order: Ordering, + ) -> MaybeUninit { + debug_assert!(src as usize % mem::size_of::<$int_type>() == 0); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + macro_rules! atomic_load { + ($acquire:tt) => { + asm!( + // (atomic) load from src to tmp pair + concat!("ld", $acquire, "exd r2, r3, [{src}]"), + "clrex", + // store tmp pair to out + "strd r2, r3, [{out}]", + src = in(reg) src, + out = in(reg) out_ptr, + // tmp pair - must be even-numbered and not R14 + out("r2") _, + out("r3") _, + options(nostack, preserves_flags), + ) + }; + } + match order { + Ordering::Relaxed => atomic_load!("r"), + // Acquire and SeqCst loads are equivalent. + Ordering::Acquire | Ordering::SeqCst => atomic_load!("a"), + _ => unreachable!("{:?}", order), + } + } + out + } + } + #[cfg(not(any(target_feature = "mclass", atomic_maybe_uninit_target_feature = "mclass")))] + impl AtomicStore for $int_type { + #[inline] + unsafe fn atomic_store( + dst: *mut MaybeUninit, + val: MaybeUninit, + order: Ordering, + ) { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + let val = val.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + macro_rules! store { + ($acquire:tt, $release:tt) => { + asm!( + // load from val to val pair + "ldrd r2, r3, [{val}]", + // (atomic) store val pair to dst (LL/SC loop) + "2:", + // load from dst to tmp pair + concat!("ld", $acquire, "exd r4, r5, [{dst}]"), + // try to store val pair to dst + concat!("st", $release, "exd {r}, r2, r3, [{dst}]"), + // 0 if the store was successful, 1 if no store was performed + "cmp {r}, 0x0", + "bne 2b", + dst = inout(reg) dst => _, + val = in(reg) val, + r = lateout(reg) _, + // val pair - must be even-numbered and not R14 + out("r2") _, + out("r3") _, + // tmp pair - must be even-numbered and not R14 + out("r4") _, + out("r5") _, + // Do not use `preserves_flags` because CMP modifies the condition flags. + options(nostack), + ) + }; + } + atomic_rmw!(store, order); + } + } + } + #[cfg(not(any(target_feature = "mclass", atomic_maybe_uninit_target_feature = "mclass")))] + impl AtomicSwap for $int_type { + #[inline] + unsafe fn atomic_swap( + dst: *mut MaybeUninit, + val: MaybeUninit, + order: Ordering, + ) -> MaybeUninit { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + let val = val.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + macro_rules! swap { + ($acquire:tt, $release:tt) => { + asm!( + // load from val to val pair + "ldrd r2, r3, [{val}]", + // (atomic) swap (LL/SC loop) + "2:", + // load from dst to out pair + concat!("ld", $acquire, "exd r4, r5, [{dst}]"), + // try to store val pair to dst + concat!("st", $release, "exd {r}, r2, r3, [{dst}]"), + // 0 if the store was successful, 1 if no store was performed + "cmp {r}, 0x0", + "bne 2b", + // store out pair to out + "strd r4, r5, [{out}]", + dst = inout(reg) dst => _, + val = in(reg) val, + out = inout(reg) out_ptr => _, + r = lateout(reg) _, + // val pair - must be even-numbered and not R14 + out("r2") _, + out("r3") _, + // out pair - must be even-numbered and not R14 + out("r4") _, + out("r5") _, + // Do not use `preserves_flags` because CMP modifies the condition flags. + options(nostack), + ) + }; + } + atomic_rmw!(swap, order); + } + out + } + } + #[cfg(not(any(target_feature = "mclass", atomic_maybe_uninit_target_feature = "mclass")))] + impl AtomicCompareExchange for $int_type { + #[inline] + unsafe fn atomic_compare_exchange( + dst: *mut MaybeUninit, + old: MaybeUninit, + new: MaybeUninit, + success: Ordering, + failure: Ordering, + ) -> (MaybeUninit, bool) { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + let order = crate::utils::upgrade_success_ordering(success, failure); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + let old = old.as_ptr(); + let new = new.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + let mut r: i32; + macro_rules! cmpxchg { + ($acquire:tt, $release:tt) => { + asm!( + "ldrd r2, r3, [{old}]", + "ldrd r8, r9, [{new}]", + // (atomic) CAS (LL/SC loop) + "2:", + concat!("ld", $acquire, "exd r4, r5, [{dst}]"), + "eor {tmp}, r5, r3", + "eor {r}, r4, r2", + "orrs {r}, {r}, {tmp}", + "bne 3f", // jump if compare failed + concat!("st", $release, "exd {r}, r8, r9, [{dst}]"), + // 0 if the store was successful, 1 if no store was performed + "cmp {r}, #0", + "bne 2b", // continue loop if store failed + "b 4f", + "3:", + // compare failed, mark r as failed and clear exclusive + "clrex", + s!("mov", "{r}, #1"), + "4:", + // store out pair to out + "strd r4, r5, [{out}]", + dst = inout(reg) dst => _, + r = lateout(reg) r, + old = in(reg) old, + new = in(reg) new, + out = inout(reg) out_ptr => _, + tmp = out(reg) _, + // old pair - must be even-numbered and not R14 + out("r2") _, + out("r3") _, + // out pair - must be even-numbered and not R14 + out("r4") _, + out("r5") _, + // new pair - must be even-numbered and not R14 + out("r8") _, + out("r9") _, + // Do not use `preserves_flags` because CMP, ORRS, and s! modify the condition flags. + options(nostack), + ) + }; + } + atomic_rmw!(cmpxchg, order); + debug_assert!(r == 0 || r == 1, "r={}", r); + // 0 if the store was successful, 1 if no store was performed + (out, r == 0) + } + } + #[inline] + unsafe fn atomic_compare_exchange_weak( + dst: *mut MaybeUninit, + old: MaybeUninit, + new: MaybeUninit, + success: Ordering, + failure: Ordering, + ) -> (MaybeUninit, bool) { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + let order = crate::utils::upgrade_success_ordering(success, failure); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + let old = old.as_ptr(); + let new = new.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + let mut r: i32; + macro_rules! cmpxchg_weak { + ($acquire:tt, $release:tt) => { + asm!( + "ldrd r2, r3, [{old}]", + "ldrd r8, r9, [{new}]", + concat!("ld", $acquire, "exd r4, r5, [{dst}]"), + "eor {tmp}, r5, r3", + "eor {r}, r4, r2", + "orrs {r}, {r}, {tmp}", + "bne 3f", // jump if compare failed + concat!("st", $release, "exd {r}, r8, r9, [{dst}]"), + "b 4f", + "3:", + // compare failed, mark r as failed and clear exclusive + "clrex", + s!("mov", "{r}, #1"), + "4:", + // store out pair to out + "strd r4, r5, [{out}]", + dst = inout(reg) dst => _, + r = lateout(reg) r, + old = in(reg) old, + new = in(reg) new, + out = inout(reg) out_ptr => _, + tmp = out(reg) _, + // old pair - must be even-numbered and not R14 + out("r2") _, + out("r3") _, + // out pair - must be even-numbered and not R14 + out("r4") _, + out("r5") _, + // new pair - must be even-numbered and not R14 + out("r8") _, + out("r9") _, + // Do not use `preserves_flags` because ORRS and s! modify the condition flags. + options(nostack), + ) + }; + } + atomic_rmw!(cmpxchg_weak, order); + debug_assert!(r == 0 || r == 1, "r={}", r); + // 0 if the store was successful, 1 if no store was performed + (out, r == 0) + } + } + } + }; +} + +atomic64!(i64); +atomic64!(u64); + +#[macro_export] +macro_rules! cfg_has_atomic_8 { + ($($tt:tt)*) => { $($tt)* }; +} +#[macro_export] +macro_rules! cfg_no_atomic_8 { + ($($tt:tt)*) => {}; +} +#[macro_export] +macro_rules! cfg_has_atomic_16 { + ($($tt:tt)*) => { $($tt)* }; +} +#[macro_export] +macro_rules! cfg_no_atomic_16 { + ($($tt:tt)*) => {}; +} +#[macro_export] +macro_rules! cfg_has_atomic_32 { + ($($tt:tt)*) => { $($tt)* }; +} +#[macro_export] +macro_rules! cfg_no_atomic_32 { + ($($tt:tt)*) => {}; +} +#[cfg(any(target_feature = "mclass", atomic_maybe_uninit_target_feature = "mclass"))] +#[macro_export] +macro_rules! cfg_has_atomic_64 { + ($($tt:tt)*) => {}; +} +#[cfg(any(target_feature = "mclass", atomic_maybe_uninit_target_feature = "mclass"))] +#[macro_export] +macro_rules! cfg_no_atomic_64 { + ($($tt:tt)*) => { $($tt)* }; +} +#[cfg(not(any(target_feature = "mclass", atomic_maybe_uninit_target_feature = "mclass")))] +#[macro_export] +macro_rules! cfg_has_atomic_64 { + ($($tt:tt)*) => { $($tt)* }; +} +#[cfg(not(any(target_feature = "mclass", atomic_maybe_uninit_target_feature = "mclass")))] +#[macro_export] +macro_rules! cfg_no_atomic_64 { + ($($tt:tt)*) => {}; +} +#[macro_export] +macro_rules! cfg_has_atomic_128 { + ($($tt:tt)*) => {}; +} +#[macro_export] +macro_rules! cfg_no_atomic_128 { + ($($tt:tt)*) => { $($tt)* }; +} +#[macro_export] +macro_rules! cfg_has_atomic_cas { + ($($tt:tt)*) => { $($tt)* }; +} +#[macro_export] +macro_rules! cfg_no_atomic_cas { + ($($tt:tt)*) => {}; +} diff --git a/src/arch_legacy/hexagon.rs b/src/arch_legacy/hexagon.rs new file mode 100644 index 00000000..e0557b0f --- /dev/null +++ b/src/arch_legacy/hexagon.rs @@ -0,0 +1,459 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// Hexagon +// +// Refs: +// - Hexagon V5x Programmer's Reference Manual +// https://developer.qualcomm.com/download/hexagon/hexagon-v5x-programmers-reference-manual.pdf?referrer=node/6116 + +#[path = "../arch/partword.rs"] +mod partword; + +use core::{arch::asm, mem::MaybeUninit, sync::atomic::Ordering}; + +use crate::raw::{AtomicCompareExchange, AtomicLoad, AtomicStore, AtomicSwap}; + +type XSize = usize; + +macro_rules! atomic_load_store { + ($int_type:ident, $asm_suffix:tt, $asm_u_suffix:tt) => { + impl AtomicLoad for $int_type { + #[inline] + unsafe fn atomic_load( + src: *const MaybeUninit, + _order: Ordering, + ) -> MaybeUninit { + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + asm!( + // (atomic) load from src to tmp + concat!("{tmp} = mem", $asm_u_suffix, $asm_suffix, "({src})"), + // store tmp to out + concat!("mem", $asm_suffix, "({out}) = {tmp}"), + src = in(reg) src, + out = inout(reg) out_ptr => _, + tmp = lateout(reg) _, + options(nostack, preserves_flags), + ); + } + out + } + } + impl AtomicStore for $int_type { + #[inline] + unsafe fn atomic_store( + dst: *mut MaybeUninit, + val: MaybeUninit, + _order: Ordering, + ) { + let val = val.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + asm!( + // load from val to tmp + concat!("{tmp} = mem", $asm_u_suffix, $asm_suffix, "({val})"), + // (atomic) store tmp to dst + concat!("mem", $asm_suffix, "({dst}) = {tmp}"), + dst = inout(reg) dst => _, + val = in(reg) val, + tmp = lateout(reg) _, + options(nostack, preserves_flags), + ); + } + } + } + }; +} + +macro_rules! atomic { + ($int_type:ident) => { + atomic_load_store!($int_type, "w", ""); + impl AtomicSwap for $int_type { + #[inline(never)] // TODO: there is no way to mark p0 as clobbered + unsafe fn atomic_swap( + dst: *mut MaybeUninit, + val: MaybeUninit, + _order: Ordering, + ) -> MaybeUninit { + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + let val = val.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + asm!( + "{val} = memw({val})", + "2:", + "{tmp} = memw_locked({dst})", + "memw_locked({dst},p0) = {val}", + "if (!p0) jump 2b", + "memw({out}) = {tmp}", + dst = in(reg) dst, + val = inout(reg) val => _, + out = in(reg) out_ptr, + tmp = out(reg) _, + options(nostack), + ); + } + out + } + } + impl AtomicCompareExchange for $int_type { + #[inline(never)] // TODO: there is no way to mark p0 as clobbered + unsafe fn atomic_compare_exchange( + dst: *mut MaybeUninit, + old: MaybeUninit, + new: MaybeUninit, + _success: Ordering, + _failure: Ordering, + ) -> (MaybeUninit, bool) { + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + let old = old.as_ptr(); + let new = new.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + let mut r: i32 = 1; + asm!( + "{old} = memw({old})", + "{new} = memw({new})", + "2:", + "{tmp} = memw_locked({dst})", + "{{ p0 = cmp.eq({tmp},{old})", + "if (!p0.new) jump:nt 3f }}", + "memw_locked({dst},p0) = {new}", + "if (!p0) jump 2b", + "jump 4f", + "3:", + "{r} = #0", + "4:", + "memw({out}) = {tmp}", + dst = in(reg) dst, + old = inout(reg) old => _, + new = inout(reg) new => _, + out = in(reg) out_ptr, + tmp = out(reg) _, + r = inout(reg) r, + options(nostack), + ); + debug_assert!(r == 0 || r == 1, "r={}", r); + (out, r != 0) + } + } + } + }; +} + +macro_rules! atomic_sub_word { + ($int_type:ident, $asm_suffix:tt, $asm_u_suffix:tt) => { + atomic_load_store!($int_type, $asm_suffix, $asm_u_suffix); + impl AtomicSwap for $int_type { + #[inline(never)] // TODO: there is no way to mark p0 as clobbered + unsafe fn atomic_swap( + dst: *mut MaybeUninit, + val: MaybeUninit, + _order: Ordering, + ) -> MaybeUninit { + let (aligned_ptr, shift, mask) = partword::create_mask_values(dst); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + let val = val.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + asm!( + concat!("{val} = mem", $asm_u_suffix, $asm_suffix, "({val})"), + "{mask} = asl({mask},{shift})", + "{val} = asl({val},{shift})", + "{val} = and({val},{mask})", + "{inv_mask} = not({mask})", + "2:", + "{out_tmp} = memw_locked({dst})", + "{tmp} = and({out_tmp},{inv_mask})", + "{tmp} = or({tmp},{val})", + "memw_locked({dst},p0) = {tmp}", + "if (!p0) jump 2b", + "{out_tmp} = asr({out_tmp},{shift})", + concat!("mem", $asm_suffix, "({out}) = {out_tmp}"), + dst = in(reg) aligned_ptr, + val = inout(reg) val => _, + out = in(reg) out_ptr, + shift = in(reg) shift, + mask = inout(reg) mask => _, + inv_mask = out(reg) _, + out_tmp = out(reg) _, + tmp = out(reg) _, + options(nostack), + ); + } + out + } + } + impl AtomicCompareExchange for $int_type { + #[inline(never)] // TODO: there is no way to mark p0 as clobbered + unsafe fn atomic_compare_exchange( + dst: *mut MaybeUninit, + old: MaybeUninit, + new: MaybeUninit, + _success: Ordering, + _failure: Ordering, + ) -> (MaybeUninit, bool) { + let (aligned_ptr, shift, mask) = partword::create_mask_values(dst); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + let old = old.as_ptr(); + let new = new.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + let mut r: i32 = 1; + asm!( + concat!("{old} = mem", $asm_u_suffix, $asm_suffix, "({old})"), + concat!("{new} = mem", $asm_u_suffix, $asm_suffix, "({new})"), + "{mask} = asl({mask},{shift})", + "{old} = asl({old},{shift})", + "{new} = asl({new},{shift})", + "{old} = and({old},{mask})", + "{new} = and({new},{mask})", + "{inv_mask} = not({mask})", + "2:", + "{tmp} = memw_locked({dst})", + "{out_tmp} = and({tmp},{mask})", + "{{ p0 = cmp.eq({out_tmp},{old})", + "if (!p0.new) jump:nt 3f }}", + "{tmp} = and({tmp},{inv_mask})", + "{tmp} = or({tmp},{new})", + "memw_locked({dst},p0) = {tmp}", + "if (!p0) jump 2b", + "jump 4f", + "3:", + "{r} = #0", + "4:", + "{out_tmp} = asr({out_tmp},{shift})", + concat!("mem", $asm_suffix, "({out}) = {out_tmp}"), + dst = in(reg) aligned_ptr, + old = inout(reg) old => _, + new = inout(reg) new => _, + out = in(reg) out_ptr, + shift = in(reg) shift, + mask = inout(reg) mask => _, + inv_mask = out(reg) _, + out_tmp = out(reg) _, + tmp = out(reg) _, + r = inout(reg) r, + options(nostack), + ); + debug_assert!(r == 0 || r == 1, "r={}", r); + (out, r != 0) + } + } + } + }; +} + +atomic_sub_word!(i8, "b", "u"); +atomic_sub_word!(u8, "b", "u"); +atomic_sub_word!(i16, "h", "u"); +atomic_sub_word!(u16, "h", "u"); +atomic!(i32); +atomic!(u32); +atomic!(isize); +atomic!(usize); + +macro_rules! atomic64 { + ($int_type:ident) => { + impl AtomicLoad for $int_type { + #[inline] + unsafe fn atomic_load( + src: *const MaybeUninit, + _order: Ordering, + ) -> MaybeUninit { + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + asm!( + // (atomic) load from src to tmp pair + "{{ r3:2 = memd({src}) }}", + // store tmp pair to out + "memd({out}) = r3:2", + src = in(reg) src, + out = in(reg) out_ptr, + out("r2") _, // tmp + out("r3") _, // tmp + options(nostack, preserves_flags), + ); + } + out + } + } + impl AtomicStore for $int_type { + #[inline] + unsafe fn atomic_store( + dst: *mut MaybeUninit, + val: MaybeUninit, + _order: Ordering, + ) { + let val = val.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + asm!( + // load from val to tmp pair + "{{ r3:2 = memd({val}) }}", + // (atomic) store tmp pair to dst + "memd({dst}) = r3:2", + dst = in(reg) dst, + val = in(reg) val, + out("r2") _, // tmp + out("r3") _, // tmp + options(nostack, preserves_flags), + ); + } + } + } + impl AtomicSwap for $int_type { + #[inline(never)] // TODO: there is no way to mark p0 as clobbered + unsafe fn atomic_swap( + dst: *mut MaybeUninit, + val: MaybeUninit, + _order: Ordering, + ) -> MaybeUninit { + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + let val = val.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + asm!( + "{{ r3:2 = memd({val}) }}", + "2:", + "{{ r5:4 = memd_locked({dst}) }}", + "memd_locked({dst},p0) = r3:2", + "if (!p0) jump 2b", + "memd({out}) = r5:4", + dst = in(reg) dst, + val = in(reg) val, + out = in(reg) out_ptr, + out("r2") _, // val + out("r3") _, // val + out("r4") _, // tmp + out("r5") _, // tmp + options(nostack), + ); + } + out + } + } + impl AtomicCompareExchange for $int_type { + #[inline(never)] // TODO: there is no way to mark p0 as clobbered + unsafe fn atomic_compare_exchange( + dst: *mut MaybeUninit, + old: MaybeUninit, + new: MaybeUninit, + _success: Ordering, + _failure: Ordering, + ) -> (MaybeUninit, bool) { + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + let old = old.as_ptr(); + let new = new.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + let mut r: i32 = 1; + asm!( + "{{ r3:2 = memd({old}) }}", + "{{ r5:4 = memd({new}) }}", + "2:", + "{{ r7:6 = memd_locked({dst}) }}", + // TODO: merge two cmp? + "{{ p0 = cmp.eq(r6,r2)", + "if (!p0.new) jump:nt 3f }}", + "{{ p0 = cmp.eq(r7,r3)", + "if (!p0.new) jump:nt 3f }}", + "memd_locked({dst},p0) = r5:4", + "if (!p0) jump 2b", + "jump 4f", + "3:", + "{r} = #0", + "4:", + "memd({out}) = r7:6", + dst = in(reg) dst, + old = in(reg) old, + new = in(reg) new, + out = in(reg) out_ptr, + r = inout(reg) r, + out("r2") _, // old + out("r3") _, // old + out("r4") _, // new + out("r5") _, // new + out("r6") _, // tmp + out("r7") _, // tmp + options(nostack), + ); + debug_assert!(r == 0 || r == 1, "r={}", r); + (out, r != 0) + } + } + } + }; +} + +atomic64!(i64); +atomic64!(u64); + +#[macro_export] +macro_rules! cfg_has_atomic_8 { + ($($tt:tt)*) => { $($tt)* }; +} +#[macro_export] +macro_rules! cfg_no_atomic_8 { + ($($tt:tt)*) => {}; +} +#[macro_export] +macro_rules! cfg_has_atomic_16 { + ($($tt:tt)*) => { $($tt)* }; +} +#[macro_export] +macro_rules! cfg_no_atomic_16 { + ($($tt:tt)*) => {}; +} +#[macro_export] +macro_rules! cfg_has_atomic_32 { + ($($tt:tt)*) => { $($tt)* }; +} +#[macro_export] +macro_rules! cfg_no_atomic_32 { + ($($tt:tt)*) => {}; +} +#[macro_export] +macro_rules! cfg_has_atomic_64 { + ($($tt:tt)*) => { $($tt)* }; +} +#[macro_export] +macro_rules! cfg_no_atomic_64 { + ($($tt:tt)*) => {}; +} +#[macro_export] +macro_rules! cfg_has_atomic_128 { + ($($tt:tt)*) => {}; +} +#[macro_export] +macro_rules! cfg_no_atomic_128 { + ($($tt:tt)*) => { $($tt)* }; +} +#[macro_export] +macro_rules! cfg_has_atomic_cas { + ($($tt:tt)*) => { $($tt)* }; +} +#[macro_export] +macro_rules! cfg_no_atomic_cas { + ($($tt:tt)*) => {}; +} diff --git a/src/arch_legacy/loongarch.rs b/src/arch_legacy/loongarch.rs new file mode 100644 index 00000000..7fee2316 --- /dev/null +++ b/src/arch_legacy/loongarch.rs @@ -0,0 +1,432 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// LoongArch +// +// Generated asm: +// - loongarch64 https://godbolt.org/z/vTxfajT14 + +#[path = "../arch/partword.rs"] +mod partword; + +use core::{ + arch::asm, + mem::{self, MaybeUninit}, + sync::atomic::Ordering, +}; + +use crate::raw::{AtomicCompareExchange, AtomicLoad, AtomicStore, AtomicSwap}; + +#[cfg(target_arch = "loongarch64")] +type XSize = u64; + +#[rustfmt::skip] +macro_rules! atomic_load { + ($int_type:ident, $asm_suffix:tt) => { + impl AtomicLoad for $int_type { + #[inline] + unsafe fn atomic_load( + src: *const MaybeUninit, + order: Ordering, + ) -> MaybeUninit { + debug_assert!(src as usize % mem::size_of::<$int_type>() == 0); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + macro_rules! atomic_load { + ($acquire:tt) => { + asm!( + // (atomic) load from src to tmp + concat!("ld.", $asm_suffix, " {tmp}, {src}, 0"), + $acquire, + // store tmp to out + concat!("st.", $asm_suffix, " {tmp}, {out}, 0"), + src = in(reg) ptr_reg!(src), + out = inout(reg) ptr_reg!(out_ptr) => _, + tmp = lateout(reg) _, + options(nostack, preserves_flags), + ) + }; + } + match order { + Ordering::Relaxed => atomic_load!(""), + // Acquire and SeqCst loads are equivalent. + Ordering::Acquire | Ordering::SeqCst => atomic_load!("dbar 0"), + _ => unreachable!("{:?}", order), + } + } + out + } + } + }; +} + +macro_rules! atomic { + ($int_type:ident, $asm_suffix:tt) => { + atomic_load!($int_type, $asm_suffix); + impl AtomicStore for $int_type { + #[inline] + unsafe fn atomic_store( + dst: *mut MaybeUninit, + val: MaybeUninit, + order: Ordering, + ) { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + let val = val.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + match order { + Ordering::Relaxed => { + asm!( + // load from val to tmp + concat!("ld.", $asm_suffix, " {tmp}, {val}, 0"), + // (atomic) store tmp to dst + concat!("st.", $asm_suffix, " {tmp}, {dst}, 0"), + dst = inout(reg) ptr_reg!(dst) => _, + val = in(reg) ptr_reg!(val), + tmp = lateout(reg) _, + options(nostack, preserves_flags), + ); + } + Ordering::Release | Ordering::SeqCst => { + asm!( + // load from val to tmp + concat!("ld.", $asm_suffix, " {tmp}, {val}, 0"), + // (atomic) store tmp to dst + concat!("amswap_db.", $asm_suffix, " $zero, {tmp}, {dst}"), + dst = inout(reg) ptr_reg!(dst) => _, + val = in(reg) ptr_reg!(val), + tmp = lateout(reg) _, + options(nostack, preserves_flags), + ) + } + _ => unreachable!("{:?}", order), + } + } + } + } + impl AtomicSwap for $int_type { + #[inline] + unsafe fn atomic_swap( + dst: *mut MaybeUninit, + val: MaybeUninit, + _order: Ordering, + ) -> MaybeUninit { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + let val = val.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + asm!( + // load from val (ptr) to val (val) + concat!("ld.", $asm_suffix, " {val}, {val}, 0"), + // (atomic) swap (AMO) + // - load value from dst and store it to tmp + // - store value of val to dst + concat!("amswap_db.", $asm_suffix, " {tmp}, {val}, {dst}"), + // store tmp to out + concat!("st.", $asm_suffix, " {tmp}, {out}, 0"), + dst = in(reg) ptr_reg!(dst), + val = inout(reg) ptr_reg!(val) => _, + out = inout(reg) ptr_reg!(out_ptr) => _, + tmp = out(reg) _, + options(nostack, preserves_flags), + ) + } + out + } + } + impl AtomicCompareExchange for $int_type { + #[inline] + unsafe fn atomic_compare_exchange( + dst: *mut MaybeUninit, + old: MaybeUninit, + new: MaybeUninit, + _success: Ordering, + _failure: Ordering, + ) -> (MaybeUninit, bool) { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + let old = old.as_ptr(); + let new = new.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + let mut r: XSize; + asm!( + // load from old/new (ptr) to old/new (val) + concat!("ld.", $asm_suffix, " {old}, {old}, 0"), + concat!("ld.", $asm_suffix, " {new}, {new}, 0"), + // (atomic) CAS (LL/SC loop) + "2:", + concat!("ll.", $asm_suffix, " {tmp}, {dst}, 0"), + "bne {tmp}, {old}, 3f", // compare and jump if compare failed + "dbar 0", + "move {r}, {new}", + concat!("sc.", $asm_suffix, " {r}, {dst}, 0"), + "beqz {r}, 2b", // continue loop if store failed + "b 4f", + "3:", + "dbar 1792", + "4:", + // store tmp to out + concat!("st.", $asm_suffix, " {tmp}, {out}, 0"), + "xor {r}, {tmp}, {old}", + "sltui {r}, {r}, 1", + dst = in(reg) ptr_reg!(dst), + old = inout(reg) ptr_reg!(old) => _, + new = inout(reg) ptr_reg!(new) => _, + out = in(reg) ptr_reg!(out_ptr), + tmp = out(reg) _, + r = out(reg) r, + options(nostack, preserves_flags), + ); + debug_assert!(r == 0 || r == 1, "r={}", r); + (out, r != 0) + } + } + } + }; +} + +macro_rules! atomic_sub_word { + ($int_type:ident, $asm_suffix:tt) => { + atomic_load!($int_type, $asm_suffix); + impl AtomicStore for $int_type { + #[inline] + unsafe fn atomic_store( + dst: *mut MaybeUninit, + val: MaybeUninit, + order: Ordering, + ) { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + let val = val.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + macro_rules! atomic_store { + ($acquire:tt, $release:tt) => { + asm!( + // load from val to tmp + concat!("ld.", $asm_suffix, " {tmp}, {val}, 0"), + // (atomic) store tmp to dst + $release, + concat!("st.", $asm_suffix, " {tmp}, {dst}, 0"), + $acquire, + dst = inout(reg) ptr_reg!(dst) => _, + val = in(reg) ptr_reg!(val), + tmp = lateout(reg) _, + options(nostack, preserves_flags), + ) + }; + } + match order { + Ordering::Relaxed => atomic_store!("", ""), + Ordering::Release => atomic_store!("", "dbar 0"), + Ordering::SeqCst => atomic_store!("dbar 0", "dbar 0"), + _ => unreachable!("{:?}", order), + } + } + } + } + impl AtomicSwap for $int_type { + #[inline] + unsafe fn atomic_swap( + dst: *mut MaybeUninit, + val: MaybeUninit, + order: Ordering, + ) -> MaybeUninit { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + let (aligned_ptr, shift, mask) = partword::create_mask_values(dst); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + let val = val.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + // Implement sub-word atomic operations using word-sized LL/SC loop. + // Based on assemblies generated by rustc/LLVM. + // See also partword.rs. + macro_rules! atomic_swap { + ($fence:tt) => { + asm!( + "sll.w {mask}, {mask}, {shift}", + "addi.w {mask}, {mask}, 0", + concat!("ld.", $asm_suffix, "u {val}, {val}, 0"), + "sll.w {val}, {val}, {shift}", + "addi.w {val}, {val}, 0", + // (atomic) swap (LL/SC loop) + "2:", + $fence, + "ll.w {tmp1}, {dst}, 0", + "addi.w {tmp2}, {val}, 0", + "xor {tmp2}, {tmp1}, {tmp2}", + "and {tmp2}, {tmp2}, {mask}", + "xor {tmp2}, {tmp1}, {tmp2}", + "sc.w {tmp2}, {dst}, 0", + "beqz {tmp2}, 2b", + "srl.w {tmp1}, {tmp1}, {shift}", + concat!("st.", $asm_suffix, " {tmp1}, {out}, 0"), + dst = in(reg) ptr_reg!(aligned_ptr), + val = inout(reg) ptr_reg!(val) => _, + out = in(reg) ptr_reg!(out_ptr), + shift = in(reg) shift, + mask = inout(reg) mask => _, + tmp1 = out(reg) _, + tmp2 = out(reg) _, + options(nostack, preserves_flags), + ) + }; + } + match order { + Ordering::Relaxed => atomic_swap!(""), + _ => atomic_swap!("dbar 0"), + } + } + out + } + } + impl AtomicCompareExchange for $int_type { + #[inline] + unsafe fn atomic_compare_exchange( + dst: *mut MaybeUninit, + old: MaybeUninit, + new: MaybeUninit, + _success: Ordering, + _failure: Ordering, + ) -> (MaybeUninit, bool) { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + let (aligned_ptr, shift, mask) = partword::create_mask_values(dst); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + let old = old.as_ptr(); + let new = new.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + let mut r: XSize; + // Implement sub-word atomic operations using word-sized LL/SC loop. + // Based on assemblies generated by rustc/LLVM. + // See also partword.rs. + asm!( + concat!("ld.", $asm_suffix, "u {new}, {new}, 0"), + concat!("ld.", $asm_suffix, "u {old}, {old}, 0"), + "sll.w {new}, {new}, {shift}", + "addi.w {new}, {new}, 0", + "sll.w {old}, {old}, {shift}", + "addi.w $a7, {old}, 0", + "sll.w {mask}, {mask}, {shift}", + "addi.w $a6, {mask}, 0", + // (atomic) CAS (LL/SC loop) + "2:", + "ll.w $t0, {dst}, 0", + "and $t1, $t0, $a6", + "bne $t1, $a7, 3f", + "dbar 0", + "andn $t1, $t0, $a6", + "or $t1, $t1, {new}", + "sc.w $t1, {dst}, 0", + "beqz $t1, 2b", + "b 4f", + "3:", + "dbar 1792", + "4:", + "srl.w $a6, $t0, {shift}", + concat!("st.", $asm_suffix, " $a6, {out}, 0"), + "and {r}, $t0, {mask}", + "addi.w {r}, {r}, 0", + "xor {r}, {old}, {r}", + "sltui {r}, {r}, 1", + dst = in(reg) ptr_reg!(aligned_ptr), + old = inout(reg) ptr_reg!(old) => _, + new = inout(reg) ptr_reg!(new) => _, + out = inout(reg) ptr_reg!(out_ptr) => _, + shift = in(reg) shift, + mask = inout(reg) mask => _, + r = lateout(reg) r, + out("$a6") _, + out("$a7") _, + out("$t0") _, + out("$t1") _, + options(nostack, preserves_flags), + ); + debug_assert!(r == 0 || r == 1, "r={}", r); + (out, r != 0) + } + } + } + }; +} + +atomic_sub_word!(i8, "b"); +atomic_sub_word!(u8, "b"); +atomic_sub_word!(i16, "h"); +atomic_sub_word!(u16, "h"); +atomic!(i32, "w"); +atomic!(u32, "w"); +#[cfg(target_arch = "loongarch64")] +atomic!(i64, "d"); +#[cfg(target_arch = "loongarch64")] +atomic!(u64, "d"); +#[cfg(target_pointer_width = "32")] +atomic!(isize, "w"); +#[cfg(target_pointer_width = "32")] +atomic!(usize, "w"); +#[cfg(target_pointer_width = "64")] +atomic!(isize, "d"); +#[cfg(target_pointer_width = "64")] +atomic!(usize, "d"); + +#[macro_export] +macro_rules! cfg_has_atomic_8 { + ($($tt:tt)*) => { $($tt)* }; +} +#[macro_export] +macro_rules! cfg_no_atomic_8 { + ($($tt:tt)*) => {}; +} +#[macro_export] +macro_rules! cfg_has_atomic_16 { + ($($tt:tt)*) => { $($tt)* }; +} +#[macro_export] +macro_rules! cfg_no_atomic_16 { + ($($tt:tt)*) => {}; +} +#[macro_export] +macro_rules! cfg_has_atomic_32 { + ($($tt:tt)*) => { $($tt)* }; +} +#[macro_export] +macro_rules! cfg_no_atomic_32 { + ($($tt:tt)*) => {}; +} +#[macro_export] +macro_rules! cfg_has_atomic_64 { + ($($tt:tt)*) => { $($tt)* }; +} +#[macro_export] +macro_rules! cfg_no_atomic_64 { + ($($tt:tt)*) => {}; +} +#[macro_export] +macro_rules! cfg_has_atomic_128 { + ($($tt:tt)*) => {}; +} +#[macro_export] +macro_rules! cfg_no_atomic_128 { + ($($tt:tt)*) => { $($tt)* }; +} +#[macro_export] +macro_rules! cfg_has_atomic_cas { + ($($tt:tt)*) => { $($tt)* }; +} +#[macro_export] +macro_rules! cfg_no_atomic_cas { + ($($tt:tt)*) => {}; +} diff --git a/src/arch_legacy/mips.rs b/src/arch_legacy/mips.rs new file mode 100644 index 00000000..4e70dde5 --- /dev/null +++ b/src/arch_legacy/mips.rs @@ -0,0 +1,465 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// MIPS32 and MIPS64 +// +// Generated asm: +// - mips https://godbolt.org/z/38oKcY5bj +// - mipsel https://godbolt.org/z/M18x694zh +// - mips64 https://godbolt.org/z/GMMda9rM8 +// - mips64el https://godbolt.org/z/31ovT3vzW + +#[path = "../arch/partword.rs"] +mod partword; + +use core::{ + arch::asm, + mem::{self, MaybeUninit}, + sync::atomic::Ordering, +}; + +use crate::raw::{AtomicCompareExchange, AtomicLoad, AtomicStore, AtomicSwap}; + +// TODO: we can add options(preserves_flags) to some of asm! + +macro_rules! atomic_rmw { + ($op:ident, $order:ident) => { + match $order { + Ordering::Relaxed => $op!("", ""), + Ordering::Acquire => $op!("sync", ""), + Ordering::Release => $op!("", "sync"), + // AcqRel and SeqCst RMWs are equivalent. + Ordering::AcqRel | Ordering::SeqCst => $op!("sync", "sync"), + _ => unreachable!("{:?}", $order), + } + }; +} + +#[cfg(any(target_arch = "mips", target_arch = "mips32r6"))] +type XSize = u32; +#[cfg(any(target_arch = "mips64", target_arch = "mips64r6"))] +type XSize = u64; + +#[rustfmt::skip] +macro_rules! atomic_load_store { + ($int_type:ident, $asm_suffix:tt, $l_u_suffix:tt) => { + impl AtomicLoad for $int_type { + #[inline] + unsafe fn atomic_load( + src: *const MaybeUninit, + order: Ordering, + ) -> MaybeUninit { + debug_assert!(src as usize % mem::size_of::<$int_type>() == 0); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + macro_rules! atomic_load { + ($acquire:tt) => { + asm!( + ".set push", + ".set noat", + // (atomic) load from src to tmp + concat!("l", $asm_suffix, " {tmp}, 0({src})"), + $acquire, + // store tmp to out + concat!("s", $asm_suffix, " {tmp}, 0({out})"), + ".set pop", + src = in(reg) ptr_reg!(src), + out = in(reg) ptr_reg!(out_ptr), + tmp = out(reg) _, + options(nostack), + ) + }; + } + match order { + Ordering::Relaxed => atomic_load!(""), + // Acquire and SeqCst loads are equivalent. + Ordering::Acquire | Ordering::SeqCst => atomic_load!("sync"), + _ => unreachable!("{:?}", order), + } + } + out + } + } + impl AtomicStore for $int_type { + #[inline] + unsafe fn atomic_store( + dst: *mut MaybeUninit, + val: MaybeUninit, + order: Ordering, + ) { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + let val = val.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + macro_rules! store { + ($acquire:tt, $release:tt) => { + asm!( + ".set push", + ".set noat", + // load from val to tmp + concat!("l", $asm_suffix, $l_u_suffix, " {tmp}, 0({val})"), + // (atomic) store tmp to dst + $release, // release fence + concat!("s", $asm_suffix, " {tmp}, 0({dst})"), + $acquire, // acquire fence + ".set pop", + dst = in(reg) ptr_reg!(dst), + val = in(reg) ptr_reg!(val), + tmp = out(reg) _, + options(nostack), + ) + }; + } + atomic_rmw!(store, order); + } + } + } + }; +} + +#[rustfmt::skip] +macro_rules! atomic { + ($int_type:ident, $asm_suffix:tt, $ll_sc_suffix:tt) => { + atomic_load_store!($int_type, $asm_suffix, ""); + impl AtomicSwap for $int_type { + #[inline] + unsafe fn atomic_swap( + dst: *mut MaybeUninit, + val: MaybeUninit, + order: Ordering, + ) -> MaybeUninit { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + let val = val.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + macro_rules! swap { + ($acquire:tt, $release:tt) => { + asm!( + ".set push", + ".set noat", + // load from val to val_tmp + concat!("l", $asm_suffix, " {val_tmp}, 0({val})"), + // (atomic) swap (LL/SC loop) + $release, // release fence + "2:", + // load from dst to out_tmp + concat!("ll", $ll_sc_suffix, " {out_tmp}, 0({dst})"), + "move {r}, {val_tmp}", + // try to store val to dst + concat!("sc", $ll_sc_suffix, " {r}, 0({dst})"), + // 1 if the store was successful, 0 if no store was performed + "beqz {r}, 2b", + $acquire, // acquire fence + // store out_tmp to out + concat!("s", $asm_suffix, " {out_tmp}, 0({out})"), + ".set pop", + dst = inout(reg) ptr_reg!(dst) => _, + val = in(reg) ptr_reg!(val), + out = inout(reg) ptr_reg!(out_ptr) => _, + val_tmp = out(reg) _, + out_tmp = out(reg) _, + r = lateout(reg) _, + options(nostack), + ) + }; + } + atomic_rmw!(swap, order); + } + out + } + } + impl AtomicCompareExchange for $int_type { + #[inline] + unsafe fn atomic_compare_exchange( + dst: *mut MaybeUninit, + old: MaybeUninit, + new: MaybeUninit, + success: Ordering, + failure: Ordering, + ) -> (MaybeUninit, bool) { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + let order = crate::utils::upgrade_success_ordering(success, failure); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + let old = old.as_ptr(); + let new = new.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + let mut r: XSize; + macro_rules! cmpxchg { + ($acquire:tt, $release:tt) => { + asm!( + ".set push", + ".set noat", + // load from old/new to old_tmp/new_tmp + concat!("l", $asm_suffix, " {old_tmp}, 0({old})"), + concat!("l", $asm_suffix, " {new_tmp}, 0({new})"), + // (atomic) CAS (LL/SC loop) + $release, // release fence + "2:", + // load from dst to out_tmp + concat!("ll", $ll_sc_suffix, " {out_tmp}, 0({dst})"), + "bne {out_tmp}, {old_tmp}, 3f", // compare and jump if compare failed + "move {r}, {new_tmp}", + // try to store new to dst + concat!("sc", $ll_sc_suffix, " {r}, 0({dst})"), + // 1 if the store was successful, 0 if no store was performed + "beqz {r}, 2b", // continue loop if store failed + "3:", + $acquire, // acquire fence + "xor {new_tmp}, {out_tmp}, {old_tmp}", + // store out_tmp to out + concat!("s", $asm_suffix, " {out_tmp}, 0({out})"), + "sltiu {r}, {new_tmp}, 1", + ".set pop", + dst = inout(reg) ptr_reg!(dst) => _, + old = in(reg) ptr_reg!(old), + new = in(reg) ptr_reg!(new), + out = inout(reg) ptr_reg!(out_ptr) => _, + new_tmp = out(reg) _, + old_tmp = out(reg) _, + out_tmp = out(reg) _, + r = lateout(reg) r, + options(nostack), + ) + }; + } + atomic_rmw!(cmpxchg, order); + debug_assert!(r == 0 || r == 1, "r={}", r); + (out, r != 0) + } + } + } + }; +} + +#[rustfmt::skip] +macro_rules! atomic_sub_word { + ($int_type:ident, $asm_suffix:tt, $max:tt) => { + atomic_load_store!($int_type, $asm_suffix, "u"); + impl AtomicSwap for $int_type { + #[inline] + unsafe fn atomic_swap( + dst: *mut MaybeUninit, + val: MaybeUninit, + order: Ordering, + ) -> MaybeUninit { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + let (aligned_ptr, shift, mask) = partword::create_mask_values(dst); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + let val = val.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + // Implement sub-word atomic operations using word-sized LL/SC loop. + // Based on assemblies generated by rustc/LLVM. + // See also partword.rs. + macro_rules! swap { + ($acquire:tt, $release:tt) => { + asm!( + ".set push", + ".set noat", + concat!("l", $asm_suffix, "u {tmp}, 0($5)"), + "sllv {mask}, {mask}, {shift}", + "sllv $7, {tmp}, {shift}", + "nor $5, $zero, {mask}", + // (atomic) swap (LL/SC loop) + $release, + "2:", + "ll $8, 0({dst})", + "and $9, $7, {mask}", + "and $10, $8, $5", + "or $10, $10, $9", + "sc $10, 0({dst})", + "beqz $10, 2b", + "and {tmp}, $8, {mask}", + "srlv {tmp}, {tmp}, {shift}", + concat!("se", $asm_suffix, " {tmp}, {tmp}"), + $acquire, + concat!("s", $asm_suffix, " {tmp}, 0({out})"), + ".set pop", + dst = in(reg) ptr_reg!(aligned_ptr), + out = in(reg) ptr_reg!(out_ptr), + shift = in(reg) shift, + mask = inout(reg) mask => _, + tmp = out(reg) _, + inout("$5") ptr_reg!(val) => _, // val => inv_mask + out("$7") _, + out("$8") _, + out("$9") _, + out("$10") _, + options(nostack), + ) + }; + } + atomic_rmw!(swap, order); + } + out + } + } + impl AtomicCompareExchange for $int_type { + #[inline] + unsafe fn atomic_compare_exchange( + dst: *mut MaybeUninit, + old: MaybeUninit, + new: MaybeUninit, + success: Ordering, + failure: Ordering, + ) -> (MaybeUninit, bool) { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + let order = crate::utils::upgrade_success_ordering(success, failure); + let (aligned_ptr, shift, _mask) = partword::create_mask_values(dst); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + let old = old.as_ptr(); + let new = new.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + let mut r: XSize; + // Implement sub-word atomic operations using word-sized LL/SC loop. + // Based on assemblies generated by rustc/LLVM. + // See also partword.rs. + macro_rules! cmpxchg { + ($acquire:tt, $release:tt) => { + asm!( + ".set push", + ".set noat", + concat!("l", $asm_suffix, "u $2, 0($6)"), // new + concat!("l", $asm_suffix, " {tmp}, 0($5)"), // old + concat!("ori $5, $zero, ", $max), + concat!("andi $8, {tmp}, ", $max), + concat!("andi $2, $2, ", $max), + "sllv $5, $5, {shift}", + "sllv $8, $8, {shift}", + "sllv $9, $2, {shift}", + "nor $6, $zero, $5", + // (atomic) CAS (LL/SC loop) + $release, + "2:", + "ll $10, 0({dst})", + "and $11, $10, $5", + "bne $11, $8, 3f", + "and $10, $10, $6", + "or $10, $10, $9", + "sc $10, 0({dst})", + "beqz $10, 2b", + "3:", + "srlv $2, $11, {shift}", + concat!("se", $asm_suffix, " $2, $2"), + $acquire, + "xor {tmp}, $2, {tmp}", + concat!("s", $asm_suffix, " $2, 0({out})"), + "sltiu $2, {tmp}, 1", + ".set pop", + dst = in(reg) ptr_reg!(aligned_ptr), + out = in(reg) ptr_reg!(out_ptr), + shift = in(reg) shift, + tmp = out(reg) _, + out("$2") r, + inout("$5") ptr_reg!(old) => _, // old => mask + inout("$6") ptr_reg!(new) => _, // new => inv_mask + out("$8") _, + out("$9") _, + out("$10") _, + out("$11") _, + options(nostack), + ) + }; + } + atomic_rmw!(cmpxchg, order); + debug_assert!(r == 0 || r == 1, "r={}", r); + (out, r != 0) + } + } + } + }; +} + +atomic_sub_word!(i8, "b", "255"); +atomic_sub_word!(u8, "b", "255"); +atomic_sub_word!(i16, "h", "65535"); +atomic_sub_word!(u16, "h", "65535"); +atomic!(i32, "w", ""); +atomic!(u32, "w", ""); +#[cfg(any(target_arch = "mips64", target_arch = "mips64r6"))] +atomic!(i64, "d", "d"); +#[cfg(any(target_arch = "mips64", target_arch = "mips64r6"))] +atomic!(u64, "d", "d"); +#[cfg(target_pointer_width = "32")] +atomic!(isize, "w", ""); +#[cfg(target_pointer_width = "32")] +atomic!(usize, "w", ""); +#[cfg(target_pointer_width = "64")] +atomic!(isize, "d", "d"); +#[cfg(target_pointer_width = "64")] +atomic!(usize, "d", "d"); + +#[macro_export] +macro_rules! cfg_has_atomic_8 { + ($($tt:tt)*) => { $($tt)* }; +} +#[macro_export] +macro_rules! cfg_no_atomic_8 { + ($($tt:tt)*) => {}; +} +#[macro_export] +macro_rules! cfg_has_atomic_16 { + ($($tt:tt)*) => { $($tt)* }; +} +#[macro_export] +macro_rules! cfg_no_atomic_16 { + ($($tt:tt)*) => {}; +} +#[macro_export] +macro_rules! cfg_has_atomic_32 { + ($($tt:tt)*) => { $($tt)* }; +} +#[macro_export] +macro_rules! cfg_no_atomic_32 { + ($($tt:tt)*) => {}; +} +#[cfg(any(target_arch = "mips", target_arch = "mips32r6"))] +#[macro_export] +macro_rules! cfg_has_atomic_64 { + ($($tt:tt)*) => {}; +} +#[cfg(any(target_arch = "mips", target_arch = "mips32r6"))] +#[macro_export] +macro_rules! cfg_no_atomic_64 { + ($($tt:tt)*) => { $($tt)* }; +} +#[cfg(any(target_arch = "mips64", target_arch = "mips64r6"))] +#[macro_export] +macro_rules! cfg_has_atomic_64 { + ($($tt:tt)*) => { $($tt)* }; +} +#[cfg(any(target_arch = "mips64", target_arch = "mips64r6"))] +#[macro_export] +macro_rules! cfg_no_atomic_64 { + ($($tt:tt)*) => {}; +} +#[macro_export] +macro_rules! cfg_has_atomic_128 { + ($($tt:tt)*) => {}; +} +#[macro_export] +macro_rules! cfg_no_atomic_128 { + ($($tt:tt)*) => { $($tt)* }; +} +#[macro_export] +macro_rules! cfg_has_atomic_cas { + ($($tt:tt)*) => { $($tt)* }; +} +#[macro_export] +macro_rules! cfg_no_atomic_cas { + ($($tt:tt)*) => {}; +} diff --git a/src/arch_legacy/mod.rs b/src/arch_legacy/mod.rs new file mode 100644 index 00000000..ad2b8d32 --- /dev/null +++ b/src/arch_legacy/mod.rs @@ -0,0 +1,101 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// This module contains the atomic implementation for older rustc that does not support MaybeUninit registers. +// +// The implementation is based on the code just before we started using MaybeUninit registers. + +#![allow(missing_docs)] // For cfg macros + +#[cfg(not(any( + target_arch = "x86", + target_arch = "x86_64", + all( + target_arch = "arm", + any( + target_feature = "v6", + atomic_maybe_uninit_target_feature = "v6", + target_os = "linux", + target_os = "android", + ), + ), + target_arch = "aarch64", + target_arch = "riscv32", + target_arch = "riscv64", + all(target_arch = "loongarch64", not(atomic_maybe_uninit_no_loongarch64_asm)), + all( + any( + target_arch = "avr", + target_arch = "hexagon", + target_arch = "mips", + target_arch = "mips32r6", + target_arch = "mips64", + target_arch = "mips64r6", + target_arch = "msp430", + target_arch = "powerpc", + target_arch = "powerpc64", + target_arch = "s390x", + ), + atomic_maybe_uninit_unstable_asm_experimental_arch, + ), +)))] +#[path = "../arch/unsupported.rs"] +mod unsupported; + +#[cfg(target_arch = "aarch64")] +mod aarch64; +#[cfg(target_arch = "arm")] +#[cfg(all( + any(target_feature = "v6", atomic_maybe_uninit_target_feature = "v6"), + not(any( + target_feature = "v8", + atomic_maybe_uninit_target_feature = "v8", + target_feature = "v8m", + atomic_maybe_uninit_target_feature = "v8m", + )), +))] +mod arm; +#[cfg(target_arch = "arm")] +#[cfg(all( + any(target_os = "linux", target_os = "android"), + not(any(target_feature = "v6", atomic_maybe_uninit_target_feature = "v6")), +))] +mod arm_linux; +#[cfg(target_arch = "arm")] +#[cfg(any( + target_feature = "v8", + atomic_maybe_uninit_target_feature = "v8", + target_feature = "v8m", + atomic_maybe_uninit_target_feature = "v8m", +))] +mod armv8; +#[cfg(target_arch = "avr")] +#[cfg(atomic_maybe_uninit_unstable_asm_experimental_arch)] +#[path = "../arch/avr.rs"] +mod avr; +#[cfg(target_arch = "hexagon")] +#[cfg(atomic_maybe_uninit_unstable_asm_experimental_arch)] +mod hexagon; +#[cfg(target_arch = "loongarch64")] +#[cfg(not(atomic_maybe_uninit_no_loongarch64_asm))] +mod loongarch; +#[cfg(any( + target_arch = "mips", + target_arch = "mips32r6", + target_arch = "mips64", + target_arch = "mips64r6", +))] +#[cfg(atomic_maybe_uninit_unstable_asm_experimental_arch)] +mod mips; +#[cfg(target_arch = "msp430")] +#[cfg(atomic_maybe_uninit_unstable_asm_experimental_arch)] +mod msp430; +#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] +#[cfg(atomic_maybe_uninit_unstable_asm_experimental_arch)] +mod powerpc; +#[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))] +mod riscv; +#[cfg(target_arch = "s390x")] +#[cfg(atomic_maybe_uninit_unstable_asm_experimental_arch)] +mod s390x; +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +mod x86; diff --git a/src/arch_legacy/msp430.rs b/src/arch_legacy/msp430.rs new file mode 100644 index 00000000..0b83f7b8 --- /dev/null +++ b/src/arch_legacy/msp430.rs @@ -0,0 +1,123 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// MSP430 +// +// Refs: https://www.ti.com/lit/ug/slau208q/slau208q.pdf + +use core::{arch::asm, mem::MaybeUninit, sync::atomic::Ordering}; + +use crate::raw::{AtomicLoad, AtomicStore}; + +macro_rules! atomic { + ($int_type:ident, $asm_suffix:tt) => { + impl AtomicLoad for $int_type { + #[inline] + unsafe fn atomic_load( + src: *const MaybeUninit, + _order: Ordering, + ) -> MaybeUninit { + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + // atomic load is always SeqCst. + asm!( + // TODO: use mem to mem mov? + // (atomic) load from src to tmp + concat!("mov", $asm_suffix, " @{src}, {tmp}"), + // store tmp to out + concat!("mov", $asm_suffix, " {tmp}, 0({out})"), + src = in(reg) src, + out = inout(reg) out_ptr => _, + tmp = lateout(reg) _, + options(nostack, preserves_flags), + ); + } + out + } + } + impl AtomicStore for $int_type { + #[inline] + unsafe fn atomic_store( + dst: *mut MaybeUninit, + val: MaybeUninit, + _order: Ordering, + ) { + let val = val.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + // atomic store is always SeqCst. + asm!( + // TODO: use mem to mem mov? + // load from val to tmp + concat!("mov", $asm_suffix, " @{val}, {tmp}"), + // (atomic) store tmp to dst + concat!("mov", $asm_suffix, " {tmp}, 0({dst})"), + dst = inout(reg) dst => _, + val = in(reg) val, + tmp = lateout(reg) _, + options(nostack, preserves_flags), + ); + } + } + } + }; +} + +atomic!(i8, ".b"); +atomic!(u8, ".b"); +atomic!(i16, ".w"); +atomic!(u16, ".w"); +atomic!(isize, ".w"); +atomic!(usize, ".w"); + +#[macro_export] +macro_rules! cfg_has_atomic_8 { + ($($tt:tt)*) => { $($tt)* }; +} +#[macro_export] +macro_rules! cfg_no_atomic_8 { + ($($tt:tt)*) => {}; +} +#[macro_export] +macro_rules! cfg_has_atomic_16 { + ($($tt:tt)*) => { $($tt)* }; +} +#[macro_export] +macro_rules! cfg_no_atomic_16 { + ($($tt:tt)*) => {}; +} +#[macro_export] +macro_rules! cfg_has_atomic_32 { + ($($tt:tt)*) => {}; +} +#[macro_export] +macro_rules! cfg_no_atomic_32 { + ($($tt:tt)*) => { $($tt)* }; +} +#[macro_export] +macro_rules! cfg_has_atomic_64 { + ($($tt:tt)*) => {}; +} +#[macro_export] +macro_rules! cfg_no_atomic_64 { + ($($tt:tt)*) => { $($tt)* }; +} +#[macro_export] +macro_rules! cfg_has_atomic_128 { + ($($tt:tt)*) => {}; +} +#[macro_export] +macro_rules! cfg_no_atomic_128 { + ($($tt:tt)*) => { $($tt)* }; +} +#[macro_export] +macro_rules! cfg_has_atomic_cas { + ($($tt:tt)*) => {}; +} +#[macro_export] +macro_rules! cfg_no_atomic_cas { + ($($tt:tt)*) => { $($tt)* }; +} diff --git a/src/arch_legacy/powerpc.rs b/src/arch_legacy/powerpc.rs new file mode 100644 index 00000000..05470de6 --- /dev/null +++ b/src/arch_legacy/powerpc.rs @@ -0,0 +1,870 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// PowerPC and PowerPC64 +// +// Refs: +// - Power ISA https://openpowerfoundation.org/specifications/isa +// - AIX Assembler language reference https://www.ibm.com/docs/en/aix/7.3?topic=aix-assembler-language-reference +// - http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2010.02.19a.html +// - portable-atomic https://github.com/taiki-e/portable-atomic +// +// Generated asm: +// - powerpc https://godbolt.org/z/PME7czo4P +// - powerpc64 https://godbolt.org/z/forK75PK4 +// - powerpc64 (pwr8) https://godbolt.org/z/eGf47W164 +// - powerpc64le https://godbolt.org/z/7f1b8WWd3 +// - powerpc64le (pwr7) https://godbolt.org/z/bKxv6W3Mn + +#[cfg(not(all( + target_arch = "powerpc64", + any( + target_feature = "partword-atomics", + atomic_maybe_uninit_target_feature = "partword-atomics", + ), +)))] +#[path = "../arch/partword.rs"] +mod partword; + +use core::{ + arch::asm, + mem::{self, MaybeUninit}, + sync::atomic::Ordering, +}; + +use crate::raw::{AtomicCompareExchange, AtomicLoad, AtomicStore, AtomicSwap}; + +#[cfg(target_arch = "powerpc64")] +#[cfg(any( + target_feature = "quadword-atomics", + atomic_maybe_uninit_target_feature = "quadword-atomics", +))] +#[cfg(target_endian = "big")] +macro_rules! p128h { + () => { + "0" + }; +} +#[cfg(target_arch = "powerpc64")] +#[cfg(any( + target_feature = "quadword-atomics", + atomic_maybe_uninit_target_feature = "quadword-atomics", +))] +#[cfg(target_endian = "big")] +macro_rules! p128l { + () => { + "8" + }; +} +#[cfg(target_arch = "powerpc64")] +#[cfg(any( + target_feature = "quadword-atomics", + atomic_maybe_uninit_target_feature = "quadword-atomics", +))] +#[cfg(target_endian = "little")] +macro_rules! p128h { + () => { + "8" + }; +} +#[cfg(target_arch = "powerpc64")] +#[cfg(any( + target_feature = "quadword-atomics", + atomic_maybe_uninit_target_feature = "quadword-atomics", +))] +#[cfg(target_endian = "little")] +macro_rules! p128l { + () => { + "0" + }; +} + +macro_rules! atomic_rmw { + ($op:ident, $order:ident) => { + match $order { + Ordering::Relaxed => $op!("", ""), + Ordering::Acquire => $op!("lwsync", ""), + Ordering::Release => $op!("", "lwsync"), + Ordering::AcqRel => $op!("lwsync", "lwsync"), + Ordering::SeqCst => $op!("lwsync", "sync"), + _ => unreachable!("{:?}", $order), + } + }; +} + +#[cfg(target_arch = "powerpc")] +type XSize = u32; +#[cfg(target_arch = "powerpc64")] +type XSize = u64; + +use XSize as Cr; +// Extracts and checks the EQ bit of cr0. +#[inline] +fn extract_cr0(r: Cr) -> bool { + r & 0x20000000 != 0 +} + +#[rustfmt::skip] +macro_rules! atomic_load_store { + ($int_type:ident, $l_suffix:tt, $asm_suffix:tt) => { + impl AtomicLoad for $int_type { + #[inline] + unsafe fn atomic_load( + src: *const MaybeUninit, + order: Ordering, + ) -> MaybeUninit { + debug_assert!(src as usize % mem::size_of::<$int_type>() == 0); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + macro_rules! atomic_load { + ($acquire:tt, $release:tt) => { + asm!( + // (atomic) load from src to tmp + $release, + concat!("l", $l_suffix, " {tmp}, 0({src})"), + $acquire, + // store tmp to out + concat!("st", $asm_suffix, " {tmp}, 0({out})"), + src = in(reg_nonzero) ptr_reg!(src), + out = inout(reg_nonzero) ptr_reg!(out_ptr) => _, + tmp = lateout(reg_nonzero) _, + options(nostack, preserves_flags), + ) + }; + } + #[cfg(target_arch = "powerpc64")] + macro_rules! atomic_load_acquire { + ($release:tt) => { + asm!( + $release, + // (atomic) load from src to tmp + concat!("l", $l_suffix, " {tmp}, 0({src})"), + // Lightweight acquire sync + // Refs: https://github.com/boostorg/atomic/blob/boost-1.79.0/include/boost/atomic/detail/core_arch_ops_gcc_ppc.hpp#L47-L62 + "cmpd %cr7, {tmp}, {tmp}", + "bne- %cr7, 2f", + "2:", + "isync", + // store tmp to out + concat!("st", $asm_suffix, " {tmp}, 0({out})"), + src = in(reg_nonzero) ptr_reg!(src), + out = inout(reg_nonzero) ptr_reg!(out_ptr) => _, + tmp = lateout(reg_nonzero) _, + out("cr7") _, + options(nostack, preserves_flags), + ) + }; + } + match order { + Ordering::Relaxed => atomic_load!("", ""), + #[cfg(target_arch = "powerpc64")] + Ordering::Acquire => atomic_load_acquire!(""), + #[cfg(target_arch = "powerpc64")] + Ordering::SeqCst => atomic_load_acquire!("sync"), + #[cfg(target_arch = "powerpc")] + Ordering::Acquire => atomic_load!("lwsync", ""), + #[cfg(target_arch = "powerpc")] + Ordering::SeqCst => atomic_load!("lwsync", "sync"), + _ => unreachable!("{:?}", order), + } + } + out + } + } + impl AtomicStore for $int_type { + #[inline] + unsafe fn atomic_store( + dst: *mut MaybeUninit, + val: MaybeUninit, + order: Ordering, + ) { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + let val = val.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + macro_rules! atomic_store { + ($release:tt) => { + asm!( + // load from val to tmp + concat!("l", $l_suffix, " {tmp}, 0({val})"), + // (atomic) store tmp to dst + $release, + concat!("st", $asm_suffix, " {tmp}, 0({dst})"), + dst = inout(reg_nonzero) ptr_reg!(dst) => _, + val = in(reg_nonzero) ptr_reg!(val), + tmp = lateout(reg_nonzero) _, + options(nostack, preserves_flags), + ) + }; + } + match order { + Ordering::Relaxed => atomic_store!(""), + Ordering::Release => atomic_store!("lwsync"), + Ordering::SeqCst => atomic_store!("sync"), + _ => unreachable!("{:?}", order), + } + } + } + } + }; +} + +#[rustfmt::skip] +macro_rules! atomic { + ($int_type:ident, $l_suffix:tt, $asm_suffix:tt, $cmp_suffix:tt) => { + atomic_load_store!($int_type, $l_suffix, $asm_suffix); + impl AtomicSwap for $int_type { + #[inline] + unsafe fn atomic_swap( + dst: *mut MaybeUninit, + val: MaybeUninit, + order: Ordering, + ) -> MaybeUninit { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + let val = val.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + macro_rules! swap { + ($acquire:tt, $release:tt) => { + asm!( + // load from val (ptr) to val (val) + concat!("l", $l_suffix, " {val}, 0({val})"), + // (atomic) swap (LL/SC loop) + $release, + "2:", + // load from dst to tmp + concat!("l", $asm_suffix, "arx {tmp}, 0, {dst}"), + // try to store val to dst + concat!("st", $asm_suffix, "cx. {val}, 0, {dst}"), + "bne %cr0, 2b", + $acquire, + // store tmp to out + concat!("st", $asm_suffix, " {tmp}, 0({out})"), + dst = in(reg_nonzero) ptr_reg!(dst), + val = inout(reg_nonzero) ptr_reg!(val) => _, + out = in(reg_nonzero) ptr_reg!(out_ptr), + tmp = out(reg_nonzero) _, + out("cr0") _, + options(nostack, preserves_flags), + ) + }; + } + atomic_rmw!(swap, order); + } + out + } + } + impl AtomicCompareExchange for $int_type { + #[inline] + unsafe fn atomic_compare_exchange( + dst: *mut MaybeUninit, + old: MaybeUninit, + new: MaybeUninit, + success: Ordering, + failure: Ordering, + ) -> (MaybeUninit, bool) { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + let order = crate::utils::upgrade_success_ordering(success, failure); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + let old = old.as_ptr(); + let new = new.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + let mut r: Cr; + macro_rules! cmpxchg { + ($acquire:tt, $release:tt) => { + asm!( + // load from old/new (ptr) to old/new (val) + concat!("l", $l_suffix, " {old}, 0({old})"), + concat!("l", $l_suffix, " {new}, 0({new})"), + // (atomic) CAS (LL/SC loop) + $release, + "2:", + concat!("l", $asm_suffix, "arx {tmp}, 0, {dst}"), + concat!("cmp", $cmp_suffix, " {old}, {tmp}"), + "bne %cr0, 3f", // jump if compare failed + concat!("st", $asm_suffix, "cx. {new}, 0, {dst}"), + "bne %cr0, 2b", // continue loop if store failed + "3:", + // if compare failed EQ bit is cleared, if stqcx succeeds EQ bit is set. + "mfcr {r}", + $acquire, + // store tmp to out + concat!("st", $asm_suffix, " {tmp}, 0({out})"), + dst = in(reg_nonzero) ptr_reg!(dst), + old = inout(reg_nonzero) ptr_reg!(old) => _, + new = inout(reg_nonzero) ptr_reg!(new) => _, + out = inout(reg_nonzero) ptr_reg!(out_ptr) => _, + tmp = out(reg_nonzero) _, + r = lateout(reg_nonzero) r, + out("cr0") _, + options(nostack, preserves_flags), + ) + }; + } + atomic_rmw!(cmpxchg, order); + (out, extract_cr0(r)) + } + } + } + }; +} + +#[cfg(not(all( + target_arch = "powerpc64", + any( + target_feature = "partword-atomics", + atomic_maybe_uninit_target_feature = "partword-atomics", + ), +)))] +#[rustfmt::skip] +macro_rules! atomic_sub_word { + ($int_type:ident, $l_suffix:tt, $asm_suffix:tt) => { + atomic_load_store!($int_type, $l_suffix, $asm_suffix); + impl AtomicSwap for $int_type { + #[inline] + unsafe fn atomic_swap( + dst: *mut MaybeUninit, + val: MaybeUninit, + order: Ordering, + ) -> MaybeUninit { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + let (aligned_ptr, shift, mask) = partword::create_mask_values(dst); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + let val = val.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + // Implement sub-word atomic operations using word-sized LL/SC loop. + // Based on assemblies generated by rustc/LLVM. + // See also partword.rs. + macro_rules! swap { + ($acquire:tt, $release:tt) => { + asm!( + concat!("l", $l_suffix, " {val}, 0({val})"), + "slw {mask}, {mask}, {shift}", + "slw {val}, {val}, {shift}", + "and {val}, {val}, {mask}", + // (atomic) swap (LL/SC loop) + $release, + "2:", + "lwarx {tmp1}, 0, {dst}", + "andc {tmp2}, {tmp1}, {mask}", + "or {tmp2}, {val}, {tmp2}", + "stwcx. {tmp2}, 0, {dst}", + "bne %cr0, 2b", + "srw {tmp1}, {tmp1}, {shift}", + $acquire, + concat!("st", $asm_suffix, " {tmp1}, 0({out})"), + dst = in(reg_nonzero) ptr_reg!(aligned_ptr), + val = inout(reg_nonzero) ptr_reg!(val) => _, + out = in(reg_nonzero) ptr_reg!(out_ptr), + shift = in(reg_nonzero) shift, + mask = inout(reg_nonzero) mask => _, + tmp1 = out(reg_nonzero) _, + tmp2 = out(reg_nonzero) _, + out("cr0") _, + options(nostack, preserves_flags), + ) + }; + } + atomic_rmw!(swap, order); + } + out + } + } + impl AtomicCompareExchange for $int_type { + #[inline] + unsafe fn atomic_compare_exchange( + dst: *mut MaybeUninit, + old: MaybeUninit, + new: MaybeUninit, + success: Ordering, + failure: Ordering, + ) -> (MaybeUninit, bool) { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + let order = crate::utils::upgrade_success_ordering(success, failure); + let (aligned_ptr, shift, mask) = partword::create_mask_values(dst); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + let old = old.as_ptr(); + let new = new.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + let mut r: Cr; + // Implement sub-word atomic operations using word-sized LL/SC loop. + // Based on assemblies generated by rustc/LLVM. + // See also partword.rs. + macro_rules! cmpxchg { + ($acquire:tt, $release:tt) => { + asm!( + concat!("l", $l_suffix, " {old}, 0({old})"), + concat!("l", $l_suffix, " {new}, 0({new})"), + "slw {mask}, {mask}, {shift}", + "slw {old}, {old}, {shift}", + "slw {new}, {new}, {shift}", + "and {old}, {old}, {mask}", + "and {new}, {new}, {mask}", + // (atomic) CAS (LL/SC loop) + $release, + "2:", + "lwarx {tmp2}, 0, {dst}", + "and {tmp1}, {tmp2}, {mask}", + "cmpw {tmp1}, {old}", + "bne %cr0, 3f", + "andc {tmp2}, {tmp2}, {mask}", + "or {tmp2}, {tmp2}, {new}", + "stwcx. {tmp2}, 0, {dst}", + "bne %cr0, 2b", + "3:", + "srw {tmp1}, {tmp1}, {shift}", + // if compare failed EQ bit is cleared, if stqcx succeeds EQ bit is set. + "mfcr {r}", + $acquire, + concat!("st", $asm_suffix, " {tmp1}, 0({out})"), + dst = in(reg_nonzero) ptr_reg!(aligned_ptr), + old = inout(reg_nonzero) ptr_reg!(old) => _, + new = inout(reg_nonzero) ptr_reg!(new) => _, + out = inout(reg_nonzero) ptr_reg!(out_ptr) => _, + shift = in(reg_nonzero) shift, + mask = inout(reg_nonzero) mask => _, + r = lateout(reg_nonzero) r, + tmp1 = out(reg_nonzero) _, + tmp2 = out(reg_nonzero) _, + out("cr0") _, + options(nostack, preserves_flags), + ) + }; + } + atomic_rmw!(cmpxchg, order); + (out, extract_cr0(r)) + } + } + } + }; +} + +#[cfg(target_arch = "powerpc64")] +#[cfg(any( + target_feature = "partword-atomics", + atomic_maybe_uninit_target_feature = "partword-atomics", +))] +atomic!(i8, "bz", "b", "w"); +#[cfg(target_arch = "powerpc64")] +#[cfg(any( + target_feature = "partword-atomics", + atomic_maybe_uninit_target_feature = "partword-atomics", +))] +atomic!(u8, "bz", "b", "w"); +#[cfg(target_arch = "powerpc64")] +#[cfg(any( + target_feature = "partword-atomics", + atomic_maybe_uninit_target_feature = "partword-atomics", +))] +atomic!(i16, "hz", "h", "w"); +#[cfg(target_arch = "powerpc64")] +#[cfg(any( + target_feature = "partword-atomics", + atomic_maybe_uninit_target_feature = "partword-atomics", +))] +atomic!(u16, "hz", "h", "w"); +#[cfg(not(all( + target_arch = "powerpc64", + any( + target_feature = "partword-atomics", + atomic_maybe_uninit_target_feature = "partword-atomics", + ), +)))] +atomic_sub_word!(i8, "bz", "b"); +#[cfg(not(all( + target_arch = "powerpc64", + any( + target_feature = "partword-atomics", + atomic_maybe_uninit_target_feature = "partword-atomics", + ), +)))] +atomic_sub_word!(u8, "bz", "b"); +#[cfg(not(all( + target_arch = "powerpc64", + any( + target_feature = "partword-atomics", + atomic_maybe_uninit_target_feature = "partword-atomics", + ), +)))] +atomic_sub_word!(i16, "hz", "h"); +#[cfg(not(all( + target_arch = "powerpc64", + any( + target_feature = "partword-atomics", + atomic_maybe_uninit_target_feature = "partword-atomics", + ), +)))] +atomic_sub_word!(u16, "hz", "h"); +atomic!(i32, "wz", "w", "w"); +atomic!(u32, "wz", "w", "w"); +#[cfg(target_arch = "powerpc64")] +atomic!(i64, "d", "d", "d"); +#[cfg(target_arch = "powerpc64")] +atomic!(u64, "d", "d", "d"); +#[cfg(target_pointer_width = "32")] +atomic!(isize, "wz", "w", "w"); +#[cfg(target_pointer_width = "32")] +atomic!(usize, "wz", "w", "w"); +#[cfg(target_pointer_width = "64")] +atomic!(isize, "d", "d", "d"); +#[cfg(target_pointer_width = "64")] +atomic!(usize, "d", "d", "d"); + +// https://github.com/llvm/llvm-project/commit/549e118e93c666914a1045fde38a2cac33e1e445 +// https://github.com/llvm/llvm-project/blob/llvmorg-17.0.0-rc2/llvm/test/CodeGen/PowerPC/atomics-i128-ldst.ll +// https://github.com/llvm/llvm-project/blob/llvmorg-17.0.0-rc2/llvm/test/CodeGen/PowerPC/atomics-i128.ll +#[cfg(target_arch = "powerpc64")] +#[cfg(any( + target_feature = "quadword-atomics", + atomic_maybe_uninit_target_feature = "quadword-atomics", +))] +macro_rules! atomic128 { + ($int_type:ident) => { + impl AtomicLoad for $int_type { + #[inline] + unsafe fn atomic_load( + src: *const MaybeUninit, + order: Ordering, + ) -> MaybeUninit { + debug_assert!(src as usize % mem::size_of::<$int_type>() == 0); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + macro_rules! atomic_load_acquire { + ($release:tt) => { + asm!( + // (atomic) load from src to out pair + $release, + "lq %r4, 0({src})", + // Refs: https://github.com/boostorg/atomic/blob/boost-1.79.0/include/boost/atomic/detail/core_arch_ops_gcc_ppc.hpp#L47-L62 + "cmpd %cr7, %r4, %r4", + "bne- %cr7, 2f", + "2:", + "isync", + // store out pair to out + concat!("std %r4, ", p128h!(), "({out})"), + concat!("std %r5, ", p128l!(), "({out})"), + src = in(reg_nonzero) ptr_reg!(src), + out = inout(reg_nonzero) ptr_reg!(out_ptr) => _, + // Quadword atomic instructions work with even/odd pair of specified register and subsequent register. + // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater. + out("r4") _, // out (hi) + out("r5") _, // out (lo) + out("cr7") _, + options(nostack, preserves_flags), + ) + }; + } + match order { + Ordering::Relaxed => { + asm!( + // (atomic) load from src to out pair + "lq %r4, 0({src})", + // store out pair to out + concat!("std %r4, ", p128h!(), "({out})"), + concat!("std %r5, ", p128l!(), "({out})"), + src = in(reg_nonzero) ptr_reg!(src), + out = inout(reg_nonzero) ptr_reg!(out_ptr) => _, + // Quadword atomic instructions work with even/odd pair of specified register and subsequent register. + // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater. + out("r4") _, // out (hi) + out("r5") _, // out (lo) + options(nostack, preserves_flags), + ) + } + Ordering::Acquire => atomic_load_acquire!(""), + Ordering::SeqCst => atomic_load_acquire!("sync"), + _ => unreachable!("{:?}", order), + } + } + out + } + } + impl AtomicStore for $int_type { + #[inline] + unsafe fn atomic_store( + dst: *mut MaybeUninit, + val: MaybeUninit, + order: Ordering, + ) { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + let val = val.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + macro_rules! atomic_store { + ($release:tt) => { + asm!( + // load from val to val pair + concat!("ld %r4, ", p128h!(), "({val})"), + concat!("ld %r5, ", p128l!(), "({val})"), + // (atomic) store val pair to dst + $release, + "stq %r4, 0({dst})", + dst = inout(reg_nonzero) ptr_reg!(dst) => _, + val = in(reg_nonzero) ptr_reg!(val), + // Quadword atomic instructions work with even/odd pair of specified register and subsequent register. + // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater. + out("r4") _, // val (hi) + lateout("r5") _, // val (lo) + options(nostack, preserves_flags), + ) + }; + } + match order { + Ordering::Relaxed => atomic_store!(""), + Ordering::Release => atomic_store!("lwsync"), + Ordering::SeqCst => atomic_store!("sync"), + _ => unreachable!("{:?}", order), + } + } + } + } + impl AtomicSwap for $int_type { + #[inline] + unsafe fn atomic_swap( + dst: *mut MaybeUninit, + val: MaybeUninit, + order: Ordering, + ) -> MaybeUninit { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + let val = val.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + macro_rules! swap { + ($acquire:tt, $release:tt) => { + asm!( + // load from val to val pair + concat!("ld %r4, ", p128h!(), "({val})"), + concat!("ld %r5, ", p128l!(), "({val})"), + // (atomic) swap (LL/SC loop) + $release, + "2:", + // load from dst to out pair + "lqarx %r6, 0, {dst}", + // try to store val pair to dst + "stqcx. %r4, 0, {dst}", + "bne %cr0, 2b", + $acquire, + // store out pair to out + concat!("std %r6, ", p128h!(), "({out})"), + concat!("std %r7, ", p128l!(), "({out})"), + dst = inout(reg_nonzero) ptr_reg!(dst) => _, + val = in(reg_nonzero) ptr_reg!(val), + out = inout(reg_nonzero) ptr_reg!(out_ptr) => _, + // Quadword atomic instructions work with even/odd pair of specified register and subsequent register. + // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater. + out("r4") _, // val (hi) + lateout("r5") _, // val (lo) + out("r6") _, // out (hi) + out("r7") _, // out (lo) + out("cr0") _, + options(nostack, preserves_flags), + ) + }; + } + atomic_rmw!(swap, order); + } + out + } + } + impl AtomicCompareExchange for $int_type { + #[inline] + unsafe fn atomic_compare_exchange( + dst: *mut MaybeUninit, + old: MaybeUninit, + new: MaybeUninit, + success: Ordering, + failure: Ordering, + ) -> (MaybeUninit, bool) { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + let order = crate::utils::upgrade_success_ordering(success, failure); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + let old = old.as_ptr(); + let new = new.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + let mut r: Cr; + macro_rules! cmpxchg { + ($acquire:tt, $release:tt) => { + asm!( + // load from old/new to old/new pairs + concat!("ld %r4, ", p128h!(), "({old})"), + concat!("ld %r5, ", p128l!(), "({old})"), + concat!("ld %r6, ", p128h!(), "({new})"), + concat!("ld %r7, ", p128l!(), "({new})"), + // (atomic) CAS (LL/SC loop) + $release, + "2:", + "lqarx %r8, 0, {dst}", + "xor {tmp_lo}, %r9, %r5", + "xor {tmp_hi}, %r8, %r4", + "or. {tmp_lo}, {tmp_lo}, {tmp_hi}", + "bne %cr0, 3f", // jump if compare failed + "stqcx. %r6, 0, {dst}", + "bne %cr0, 2b", // continue loop if store failed + "3:", + // if compare failed EQ bit is cleared, if stqcx succeeds EQ bit is set. + "mfcr {tmp_lo}", + $acquire, + // store out pair to out + concat!("std %r8, ", p128h!(), "({out})"), + concat!("std %r9, ", p128l!(), "({out})"), + dst = inout(reg_nonzero) ptr_reg!(dst) => _, + old = in(reg_nonzero) ptr_reg!(old), + new = in(reg_nonzero) ptr_reg!(new), + out = inout(reg_nonzero) ptr_reg!(out_ptr) => _, + tmp_hi = out(reg_nonzero) _, + tmp_lo = out(reg_nonzero) r, + // Quadword atomic instructions work with even/odd pair of specified register and subsequent register. + // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater. + out("r4") _, // old (hi) + out("r5") _, // old (lo) + out("r6") _, // new (hi) + lateout("r7") _, // new (lo) + lateout("r8") _, // out (hi) + lateout("r9") _, // out (lo) + out("cr0") _, + options(nostack, preserves_flags), + ) + }; + } + atomic_rmw!(cmpxchg, order); + (out, extract_cr0(r)) + } + } + } + }; +} + +#[cfg(target_arch = "powerpc64")] +#[cfg(any( + target_feature = "quadword-atomics", + atomic_maybe_uninit_target_feature = "quadword-atomics", +))] +atomic128!(i128); +#[cfg(target_arch = "powerpc64")] +#[cfg(any( + target_feature = "quadword-atomics", + atomic_maybe_uninit_target_feature = "quadword-atomics", +))] +atomic128!(u128); + +#[macro_export] +macro_rules! cfg_has_atomic_8 { + ($($tt:tt)*) => { $($tt)* }; +} +#[macro_export] +macro_rules! cfg_no_atomic_8 { + ($($tt:tt)*) => {}; +} +#[macro_export] +macro_rules! cfg_has_atomic_16 { + ($($tt:tt)*) => { $($tt)* }; +} +#[macro_export] +macro_rules! cfg_no_atomic_16 { + ($($tt:tt)*) => {}; +} +#[macro_export] +macro_rules! cfg_has_atomic_32 { + ($($tt:tt)*) => { $($tt)* }; +} +#[macro_export] +macro_rules! cfg_no_atomic_32 { + ($($tt:tt)*) => {}; +} +#[cfg(target_arch = "powerpc")] +#[macro_export] +macro_rules! cfg_has_atomic_64 { + ($($tt:tt)*) => {}; +} +#[cfg(target_arch = "powerpc")] +#[macro_export] +macro_rules! cfg_no_atomic_64 { + ($($tt:tt)*) => { $($tt)* }; +} +#[cfg(target_arch = "powerpc64")] +#[macro_export] +macro_rules! cfg_has_atomic_64 { + ($($tt:tt)*) => { $($tt)* }; +} +#[cfg(target_arch = "powerpc64")] +#[macro_export] +macro_rules! cfg_no_atomic_64 { + ($($tt:tt)*) => {}; +} +#[cfg(not(all( + target_arch = "powerpc64", + any( + target_feature = "quadword-atomics", + atomic_maybe_uninit_target_feature = "quadword-atomics", + ), +)))] +#[macro_export] +macro_rules! cfg_has_atomic_128 { + ($($tt:tt)*) => {}; +} +#[cfg(not(all( + target_arch = "powerpc64", + any( + target_feature = "quadword-atomics", + atomic_maybe_uninit_target_feature = "quadword-atomics", + ), +)))] +#[macro_export] +macro_rules! cfg_no_atomic_128 { + ($($tt:tt)*) => { $($tt)* }; +} +#[cfg(all( + target_arch = "powerpc64", + any( + target_feature = "quadword-atomics", + atomic_maybe_uninit_target_feature = "quadword-atomics", + ), +))] +#[macro_export] +macro_rules! cfg_has_atomic_128 { + ($($tt:tt)*) => { $($tt)* }; +} +#[cfg(all( + target_arch = "powerpc64", + any( + target_feature = "quadword-atomics", + atomic_maybe_uninit_target_feature = "quadword-atomics", + ), +))] +#[macro_export] +macro_rules! cfg_no_atomic_128 { + ($($tt:tt)*) => {}; +} +#[macro_export] +macro_rules! cfg_has_atomic_cas { + ($($tt:tt)*) => { $($tt)* }; +} +#[macro_export] +macro_rules! cfg_no_atomic_cas { + ($($tt:tt)*) => {}; +} diff --git a/src/arch_legacy/riscv.rs b/src/arch_legacy/riscv.rs new file mode 100644 index 00000000..d7e6e401 --- /dev/null +++ b/src/arch_legacy/riscv.rs @@ -0,0 +1,474 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// RISC-V +// +// Refs: +// - RISC-V Atomics ABI Specification +// https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/HEAD/riscv-atomic.adoc +// - "Mappings from C/C++ primitives to RISC-V primitives." table in RISC-V Instruction Set Manual: +// https://five-embeddev.com/riscv-isa-manual/latest/memory.html#sec:memory:porting +// - RISC-V Instruction Set Specifications https://msyksphinz-self.github.io/riscv-isadoc/html/index.html +// - portable-atomic https://github.com/taiki-e/portable-atomic +// +// Generated asm: +// - riscv64gc https://godbolt.org/z/nW3Po8n4K +// - riscv32imac https://godbolt.org/z/51nPPMYze + +#[cfg(any(target_feature = "a", atomic_maybe_uninit_target_feature = "a"))] +#[path = "../arch/partword.rs"] +mod partword; + +use core::{ + arch::asm, + mem::{self, MaybeUninit}, + sync::atomic::Ordering, +}; + +#[cfg(any(target_feature = "a", atomic_maybe_uninit_target_feature = "a"))] +use crate::raw::{AtomicCompareExchange, AtomicSwap}; +use crate::raw::{AtomicLoad, AtomicStore}; + +#[cfg(any(target_feature = "a", atomic_maybe_uninit_target_feature = "a"))] +#[cfg(target_arch = "riscv32")] +macro_rules! w { + () => { + "" + }; +} +#[cfg(any(target_feature = "a", atomic_maybe_uninit_target_feature = "a"))] +#[cfg(target_arch = "riscv64")] +macro_rules! w { + () => { + "w" + }; +} + +#[cfg(any(target_feature = "a", atomic_maybe_uninit_target_feature = "a"))] +macro_rules! atomic_rmw_amo { + ($op:ident, $order:ident) => { + match $order { + Ordering::Relaxed => $op!(""), + Ordering::Acquire => $op!(".aq"), + Ordering::Release => $op!(".rl"), + // AcqRel and SeqCst RMWs are equivalent. + Ordering::AcqRel | Ordering::SeqCst => $op!(".aqrl"), + _ => unreachable!("{:?}", $order), + } + }; +} +#[cfg(any(target_feature = "a", atomic_maybe_uninit_target_feature = "a"))] +macro_rules! atomic_rmw_lr_sc { + ($op:ident, $order:ident) => { + match $order { + Ordering::Relaxed => $op!("", ""), + Ordering::Acquire => $op!(".aq", ""), + Ordering::Release => $op!("", ".rl"), + Ordering::AcqRel => $op!(".aq", ".rl"), + Ordering::SeqCst => $op!(".aqrl", ".rl"), + _ => unreachable!("{:?}", $order), + } + }; +} + +#[cfg(any(target_feature = "a", atomic_maybe_uninit_target_feature = "a"))] +#[cfg(target_arch = "riscv32")] +type XSize = u32; +#[cfg(any(target_feature = "a", atomic_maybe_uninit_target_feature = "a"))] +#[cfg(target_arch = "riscv64")] +type XSize = u64; + +#[rustfmt::skip] +macro_rules! atomic_load_store { + ($int_type:ident, $asm_suffix:tt) => { + impl AtomicLoad for $int_type { + #[inline] + unsafe fn atomic_load( + src: *const MaybeUninit, + order: Ordering, + ) -> MaybeUninit { + debug_assert!(src as usize % mem::size_of::<$int_type>() == 0); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + macro_rules! atomic_load { + ($acquire:tt, $release:tt) => { + asm!( + // (atomic) load from src to tmp + $release, + concat!("l", $asm_suffix, " {tmp}, 0({src})"), + $acquire, + // store tmp to out + concat!("s", $asm_suffix, " {tmp}, 0({out})"), + src = in(reg) ptr_reg!(src), + out = inout(reg) ptr_reg!(out_ptr) => _, + tmp = lateout(reg) _, + options(nostack, preserves_flags), + ) + }; + } + match order { + Ordering::Relaxed => atomic_load!("", ""), + Ordering::Acquire => atomic_load!("fence r, rw", ""), + Ordering::SeqCst => atomic_load!("fence r, rw", "fence rw, rw"), + _ => unreachable!("{:?}", order), + } + } + out + } + } + impl AtomicStore for $int_type { + #[inline] + unsafe fn atomic_store( + dst: *mut MaybeUninit, + val: MaybeUninit, + order: Ordering, + ) { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + let val = val.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + macro_rules! atomic_store { + ($release:tt) => { + asm!( + // load from val to tmp + concat!("l", $asm_suffix, " {tmp}, 0({val})"), + // (atomic) store tmp to dst + $release, + concat!("s", $asm_suffix, " {tmp}, 0({dst})"), + dst = inout(reg) ptr_reg!(dst) => _, + val = in(reg) ptr_reg!(val), + tmp = lateout(reg) _, + options(nostack, preserves_flags), + ) + }; + } + match order { + Ordering::Relaxed => atomic_store!(""), + // Release and SeqCst stores are equivalent. + Ordering::Release | Ordering::SeqCst => atomic_store!("fence rw, w"), + _ => unreachable!("{:?}", order), + } + } + } + } + }; +} + +macro_rules! atomic { + ($int_type:ident, $asm_suffix:tt) => { + atomic_load_store!($int_type, $asm_suffix); + #[cfg(any(target_feature = "a", atomic_maybe_uninit_target_feature = "a"))] + impl AtomicSwap for $int_type { + #[inline] + unsafe fn atomic_swap( + dst: *mut MaybeUninit, + val: MaybeUninit, + order: Ordering, + ) -> MaybeUninit { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + let val = val.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + macro_rules! swap { + ($order:tt) => { + asm!( + // load from val (ptr) to val (val) + concat!("l", $asm_suffix, " {val}, 0({val})"), + // (atomic) swap (AMO) + // - load value from dst and store it to tmp + // - store value of val to dst + concat!("amoswap.", $asm_suffix, $order, " {tmp}, {val}, 0({dst})"), + // store tmp to out + concat!("s", $asm_suffix, " {tmp}, 0({out})"), + dst = in(reg) ptr_reg!(dst), + val = inout(reg) ptr_reg!(val) => _, + out = inout(reg) ptr_reg!(out_ptr) => _, + tmp = lateout(reg) _, + options(nostack, preserves_flags), + ) + }; + } + atomic_rmw_amo!(swap, order); + } + out + } + } + #[cfg(any(target_feature = "a", atomic_maybe_uninit_target_feature = "a"))] + impl AtomicCompareExchange for $int_type { + #[inline] + unsafe fn atomic_compare_exchange( + dst: *mut MaybeUninit, + old: MaybeUninit, + new: MaybeUninit, + success: Ordering, + failure: Ordering, + ) -> (MaybeUninit, bool) { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + let order = crate::utils::upgrade_success_ordering(success, failure); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + let old = old.as_ptr(); + let new = new.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + let mut r: XSize; + macro_rules! cmpxchg { + ($acquire:tt, $release:tt) => { + asm!( + // load from old/new (ptr) to old/new (val) + concat!("l", $asm_suffix, " {old}, 0({old})"), + concat!("l", $asm_suffix, " {new}, 0({new})"), + // (atomic) CAS (LR/SC loop) + "2:", + concat!("lr.", $asm_suffix, $acquire, " {tmp}, 0({dst})"), + "bne {tmp}, {old}, 3f", // compare and jump if compare failed + concat!("sc.", $asm_suffix, $release, " {r}, {new}, 0({dst})"), + "bnez {r}, 2b", // continue loop if store failed + "3:", + "xor {r}, {tmp}, {old}", + "seqz {r}, {r}", + // store tmp to out + concat!("s", $asm_suffix, " {tmp}, 0({out})"), + dst = in(reg) ptr_reg!(dst), + old = inout(reg) ptr_reg!(old) => _, + new = inout(reg) ptr_reg!(new) => _, + out = in(reg) ptr_reg!(out_ptr), + tmp = out(reg) _, + r = out(reg) r, + options(nostack, preserves_flags), + ) + }; + } + atomic_rmw_lr_sc!(cmpxchg, order); + debug_assert!(r == 0 || r == 1, "r={}", r); + (out, r != 0) + } + } + } + }; +} + +#[rustfmt::skip] +macro_rules! atomic_sub_word { + ($int_type:ident, $asm_suffix:tt) => { + atomic_load_store!($int_type, $asm_suffix); + #[cfg(any(target_feature = "a", atomic_maybe_uninit_target_feature = "a"))] + impl AtomicSwap for $int_type { + #[inline] + unsafe fn atomic_swap( + dst: *mut MaybeUninit, + val: MaybeUninit, + order: Ordering, + ) -> MaybeUninit { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + let (aligned_ptr, shift, mask) = partword::create_mask_values(dst); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + let val = val.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + // Implement sub-word atomic operations using word-sized LL/SC loop. + // Based on assemblies generated by rustc/LLVM. + // See also partword.rs. + macro_rules! swap { + ($acquire:tt, $release:tt) => { + asm!( + concat!("l", $asm_suffix, "u {val}, 0({val})"), + concat!("sll", w!(), " {mask}, {mask}, {shift}"), + concat!("sll", w!(), " {val}, {val}, {shift}"), + // (atomic) swap (LR/SC loop) + "2:", + concat!("lr.w", $acquire, " {tmp1}, 0({dst})"), + "mv {tmp2}, {val}", + "xor {tmp2}, {tmp2}, {tmp1}", + "and {tmp2}, {tmp2}, {mask}", + "xor {tmp2}, {tmp2}, {tmp1}", + concat!("sc.w", $release, " {tmp2}, {tmp2}, 0({dst})"), + "bnez {tmp2}, 2b", + concat!("srl", w!(), " {tmp1}, {tmp1}, {shift}"), + concat!("s", $asm_suffix, " {tmp1}, 0({out})"), + dst = in(reg) ptr_reg!(aligned_ptr), + val = inout(reg) ptr_reg!(val) => _, + out = in(reg) ptr_reg!(out_ptr), + shift = in(reg) shift, + mask = inout(reg) mask => _, + tmp1 = out(reg) _, + tmp2 = out(reg) _, + options(nostack, preserves_flags), + ) + }; + } + atomic_rmw_lr_sc!(swap, order); + } + out + } + } + #[cfg(any(target_feature = "a", atomic_maybe_uninit_target_feature = "a"))] + impl AtomicCompareExchange for $int_type { + #[inline] + unsafe fn atomic_compare_exchange( + dst: *mut MaybeUninit, + old: MaybeUninit, + new: MaybeUninit, + success: Ordering, + failure: Ordering, + ) -> (MaybeUninit, bool) { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + let order = crate::utils::upgrade_success_ordering(success, failure); + let (aligned_ptr, shift, mask) = partword::create_mask_values(dst); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + let old = old.as_ptr(); + let new = new.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + let mut r: XSize; + // Implement sub-word atomic operations using word-sized LL/SC loop. + // Based on assemblies generated by rustc/LLVM. + // See also partword.rs. + macro_rules! cmpxchg { + ($acquire:tt, $release:tt) => { + asm!( + concat!("l", $asm_suffix, "u {old}, 0({old})"), + concat!("l", $asm_suffix, "u {new}, 0({new})"), + concat!("sll", w!(), " {mask}, {mask}, {shift}"), + concat!("sll", w!(), " {old}, {old}, {shift}"), + concat!("sll", w!(), " {new}, {new}, {shift}"), + // (atomic) CAS (LR/SC loop) + "2:", + concat!("lr.w", $acquire, " {tmp1}, 0({dst})"), + "and {tmp2}, {tmp1}, {mask}", + "bne {tmp2}, {old}, 3f", + "xor {tmp2}, {tmp1}, {new}", + "and {tmp2}, {tmp2}, {mask}", + "xor {tmp2}, {tmp2}, {tmp1}", + concat!("sc.w", $release, " {tmp2}, {tmp2}, 0({dst})"), + "bnez {tmp2}, 2b", + "3:", + concat!("srl", w!(), " {tmp2}, {tmp1}, {shift}"), + "and {tmp1}, {tmp1}, {mask}", + "xor {r}, {old}, {tmp1}", + "seqz {r}, {r}", + concat!("s", $asm_suffix, " {tmp2}, 0({out})"), + dst = in(reg) ptr_reg!(aligned_ptr), + old = inout(reg) ptr_reg!(old) => _, + new = inout(reg) ptr_reg!(new) => _, + out = inout(reg) ptr_reg!(out_ptr) => _, + shift = in(reg) shift, + mask = inout(reg) mask => _, + r = lateout(reg) r, + tmp1 = out(reg) _, + tmp2 = out(reg) _, + options(nostack, preserves_flags), + ) + }; + } + atomic_rmw_lr_sc!(cmpxchg, order); + debug_assert!(r == 0 || r == 1, "r={}", r); + (out, r != 0) + } + } + } + }; +} + +atomic_sub_word!(i8, "b"); +atomic_sub_word!(u8, "b"); +atomic_sub_word!(i16, "h"); +atomic_sub_word!(u16, "h"); +atomic!(i32, "w"); +atomic!(u32, "w"); +#[cfg(target_arch = "riscv64")] +atomic!(i64, "d"); +#[cfg(target_arch = "riscv64")] +atomic!(u64, "d"); +#[cfg(target_pointer_width = "32")] +atomic!(isize, "w"); +#[cfg(target_pointer_width = "32")] +atomic!(usize, "w"); +#[cfg(target_pointer_width = "64")] +atomic!(isize, "d"); +#[cfg(target_pointer_width = "64")] +atomic!(usize, "d"); + +#[macro_export] +macro_rules! cfg_has_atomic_8 { + ($($tt:tt)*) => { $($tt)* }; +} +#[macro_export] +macro_rules! cfg_no_atomic_8 { + ($($tt:tt)*) => {}; +} +#[macro_export] +macro_rules! cfg_has_atomic_16 { + ($($tt:tt)*) => { $($tt)* }; +} +#[macro_export] +macro_rules! cfg_no_atomic_16 { + ($($tt:tt)*) => {}; +} +#[macro_export] +macro_rules! cfg_has_atomic_32 { + ($($tt:tt)*) => { $($tt)* }; +} +#[macro_export] +macro_rules! cfg_no_atomic_32 { + ($($tt:tt)*) => {}; +} +#[cfg(target_arch = "riscv32")] +#[macro_export] +macro_rules! cfg_has_atomic_64 { + ($($tt:tt)*) => {}; +} +#[cfg(target_arch = "riscv32")] +#[macro_export] +macro_rules! cfg_no_atomic_64 { + ($($tt:tt)*) => { $($tt)* }; +} +#[cfg(target_arch = "riscv64")] +#[macro_export] +macro_rules! cfg_has_atomic_64 { + ($($tt:tt)*) => { $($tt)* }; +} +#[cfg(target_arch = "riscv64")] +#[macro_export] +macro_rules! cfg_no_atomic_64 { + ($($tt:tt)*) => {}; +} +#[macro_export] +macro_rules! cfg_has_atomic_128 { + ($($tt:tt)*) => {}; +} +#[macro_export] +macro_rules! cfg_no_atomic_128 { + ($($tt:tt)*) => { $($tt)* }; +} +#[cfg(any(target_feature = "a", atomic_maybe_uninit_target_feature = "a"))] +#[macro_export] +macro_rules! cfg_has_atomic_cas { + ($($tt:tt)*) => { $($tt)* }; +} +#[cfg(any(target_feature = "a", atomic_maybe_uninit_target_feature = "a"))] +#[macro_export] +macro_rules! cfg_no_atomic_cas { + ($($tt:tt)*) => {}; +} +#[cfg(not(any(target_feature = "a", atomic_maybe_uninit_target_feature = "a")))] +#[macro_export] +macro_rules! cfg_has_atomic_cas { + ($($tt:tt)*) => {}; +} +#[cfg(not(any(target_feature = "a", atomic_maybe_uninit_target_feature = "a")))] +#[macro_export] +macro_rules! cfg_no_atomic_cas { + ($($tt:tt)*) => { $($tt)* }; +} diff --git a/src/arch_legacy/s390x.rs b/src/arch_legacy/s390x.rs new file mode 100644 index 00000000..7ad59982 --- /dev/null +++ b/src/arch_legacy/s390x.rs @@ -0,0 +1,544 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// s390x +// +// Refs: +// - z/Architecture Principles of Operation https://publibfp.dhe.ibm.com/epubs/pdf/a227832d.pdf +// - z/Architecture Reference Summary https://www.ibm.com/support/pages/zarchitecture-reference-summary +// - portable-atomic https://github.com/taiki-e/portable-atomic +// +// Generated asm: +// - s390x https://godbolt.org/z/qv8s6o13G +// - s390x (z196) https://godbolt.org/z/jW67E4YEq + +#[path = "../arch/partword.rs"] +mod partword; + +use core::{ + arch::asm, + mem::{self, MaybeUninit}, + sync::atomic::Ordering, +}; + +use crate::raw::{AtomicCompareExchange, AtomicLoad, AtomicStore, AtomicSwap}; + +type XSize = u64; + +// Extracts and checks condition code. +#[inline] +fn extract_cc(r: i64) -> bool { + let r = r.wrapping_add(-268435456) & (1 << 31); + debug_assert!(r == 0 || r == 2147483648, "r={r}"); + r != 0 +} + +#[inline] +fn complement(v: u32) -> u32 { + (v ^ !0).wrapping_add(1) +} + +macro_rules! atomic_load_store { + ($int_type:ident, $l_suffix:tt, $asm_suffix:tt) => { + impl AtomicLoad for $int_type { + #[inline] + unsafe fn atomic_load( + src: *const MaybeUninit, + _order: Ordering, + ) -> MaybeUninit { + debug_assert!(src as usize % mem::size_of::<$int_type>() == 0); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + // atomic load is always SeqCst. + asm!( + // (atomic) load from src to r0 + concat!("l", $l_suffix, " %r0, 0({src})"), + // store r0 to out + concat!("st", $asm_suffix, " %r0, 0({out})"), + src = in(reg) ptr_reg!(src), + out = in(reg) ptr_reg!(out_ptr), + out("r0") _, + options(nostack, preserves_flags), + ); + } + out + } + } + impl AtomicStore for $int_type { + #[inline] + unsafe fn atomic_store( + dst: *mut MaybeUninit, + val: MaybeUninit, + order: Ordering, + ) { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + let val = val.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + macro_rules! atomic_store { + ($fence:tt) => { + asm!( + // load from val to r0 + concat!("l", $l_suffix, " %r0, 0({val})"), + // (atomic) store r0 to dst + concat!("st", $asm_suffix, " %r0, 0({dst})"), + $fence, + dst = in(reg) ptr_reg!(dst), + val = in(reg) ptr_reg!(val), + out("r0") _, + options(nostack, preserves_flags), + ) + }; + } + match order { + // Relaxed and Release stores are equivalent. + Ordering::Relaxed | Ordering::Release => atomic_store!(""), + // bcr 14,0 (fast-BCR-serialization) requires z196 or later. + #[cfg(any( + target_feature = "fast-serialization", + atomic_maybe_uninit_target_feature = "fast-serialization", + ))] + Ordering::SeqCst => atomic_store!("bcr 14, 0"), + #[cfg(not(any( + target_feature = "fast-serialization", + atomic_maybe_uninit_target_feature = "fast-serialization", + )))] + Ordering::SeqCst => atomic_store!("bcr 15, 0"), + _ => unreachable!("{:?}", order), + } + } + } + } + }; +} + +macro_rules! atomic { + ($int_type:ident, $asm_suffix:tt) => { + atomic_load_store!($int_type, $asm_suffix, $asm_suffix); + impl AtomicSwap for $int_type { + #[inline] + unsafe fn atomic_swap( + dst: *mut MaybeUninit, + val: MaybeUninit, + _order: Ordering, + ) -> MaybeUninit { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + let val = val.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + // atomic swap is always SeqCst. + asm!( + // load from val to val_tmp + concat!("l", $asm_suffix, " {val_tmp}, 0({val})"), + // (atomic) swap (CAS loop) + concat!("l", $asm_suffix, " %r0, 0({dst})"), + "2:", + concat!("cs", $asm_suffix, " %r0, {val_tmp}, 0({dst})"), + "jl 2b", + // store r0 to out + concat!("st", $asm_suffix, " %r0, 0({out})"), + dst = in(reg) ptr_reg!(dst), + val = in(reg) ptr_reg!(val), + val_tmp = out(reg) _, + out = in(reg) ptr_reg!(out_ptr), + out("r0") _, + // Do not use `preserves_flags` because CS modifies the condition code. + options(nostack), + ); + } + out + } + } + impl AtomicCompareExchange for $int_type { + #[inline] + unsafe fn atomic_compare_exchange( + dst: *mut MaybeUninit, + old: MaybeUninit, + new: MaybeUninit, + _success: Ordering, + _failure: Ordering, + ) -> (MaybeUninit, bool) { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + let old = old.as_ptr(); + let new = new.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + let mut r: i64; + // compare_exchange is always SeqCst. + asm!( + // load from old/new to r0/tmp + concat!("l", $asm_suffix, " %r0, 0({old})"), + concat!("l", $asm_suffix, " {tmp}, 0({new})"), + // (atomic) CAS + concat!("cs", $asm_suffix, " %r0, {tmp}, 0({dst})"), + // store condition code + "ipm {tmp}", + // store r0 to out + concat!("st", $asm_suffix, " %r0, 0({out})"), + dst = in(reg) ptr_reg!(dst), + old = in(reg) ptr_reg!(old), + new = in(reg) ptr_reg!(new), + tmp = out(reg) r, + out = in(reg) ptr_reg!(out_ptr), + out("r0") _, + // Do not use `preserves_flags` because CS modifies the condition code. + options(nostack), + ); + (out, extract_cc(r)) + } + } + } + }; +} + +macro_rules! atomic_sub_word { + ($int_type:ident, $l_suffix:tt, $asm_suffix:tt, $bits:tt, $risbg_swap:tt, $risbg_cas:tt) => { + atomic_load_store!($int_type, $l_suffix, $asm_suffix); + impl AtomicSwap for $int_type { + #[inline] + unsafe fn atomic_swap( + dst: *mut MaybeUninit, + val: MaybeUninit, + _order: Ordering, + ) -> MaybeUninit { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + let (aligned_ptr, shift, _mask) = partword::create_mask_values(dst); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + let val = val.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + // Implement sub-word atomic operations using word-sized CAS loop. + // Based on assemblies generated by rustc/LLVM. + // See also partword.rs. + asm!( + concat!("l", $l_suffix, " %r0, 0(%r3)"), + "l %r3, 0({dst})", + "2:", + "rll %r14, %r3, 0({shift})", + concat!("risbg %r14, %r0, 32, ", $risbg_swap), + "rll %r14, %r14, 0({shift_c})", + "cs %r3, %r14, 0({dst})", + "jl 2b", + concat!("rll %r0, %r3, ", $bits ,"({shift})"), + concat!("st", $asm_suffix, " %r0, 0({out})"), + dst = in(reg) ptr_reg!(aligned_ptr), + out = in(reg) ptr_reg!(out_ptr), + shift = in(reg) shift as u32, + shift_c = in(reg) complement(shift as u32), + out("r0") _, + inout("r3") ptr_reg!(val) => _, + out("r14") _, + // Do not use `preserves_flags` because CS modifies the condition code. + options(nostack), + ); + } + out + } + } + impl AtomicCompareExchange for $int_type { + #[inline] + unsafe fn atomic_compare_exchange( + dst: *mut MaybeUninit, + old: MaybeUninit, + new: MaybeUninit, + _success: Ordering, + _failure: Ordering, + ) -> (MaybeUninit, bool) { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + let (aligned_ptr, shift, _mask) = partword::create_mask_values(dst); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + let old = old.as_ptr(); + let new = new.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + let mut r: i64; + // Implement sub-word atomic operations using word-sized CAS loop. + // Based on assemblies generated by rustc/LLVM. + // See also partword.rs. + asm!( + concat!("ll", $asm_suffix, " %r0, 0(%r3)"), + concat!("l", $l_suffix, " %r1, 0(%r4)"), + "l %r4, 0({dst})", + "2:", + concat!("rll %r13, %r4, ", $bits ,"({shift})"), + concat!("risbg %r1, %r13, 32, ", $risbg_cas, ", 0"), + concat!("ll", $asm_suffix, "r %r13, %r13"), + "cr %r13, %r0", + "jlh 3f", + concat!("rll %r3, %r1, -", $bits ,"({shift_c})"), + "cs %r4, %r3, 0({dst})", + "jl 2b", + "3:", + // store condition code + "ipm %r0", + concat!("st", $asm_suffix, " %r13, 0({out})"), + dst = in(reg) ptr_reg!(aligned_ptr), + out = in(reg) ptr_reg!(out_ptr), + shift = in(reg) shift as u32, + shift_c = in(reg) complement(shift as u32), + out("r0") r, + out("r1") _, + inout("r3") ptr_reg!(old) => _, + inout("r4") ptr_reg!(new) => _, + out("r13") _, + // Do not use `preserves_flags` because CS modifies the condition code. + options(nostack), + ); + (out, extract_cc(r)) + } + } + } + }; +} + +atomic_sub_word!(i8, "b", "c", "8", "39, 24", "55"); +atomic_sub_word!(u8, "b", "c", "8", "39, 24", "55"); +atomic_sub_word!(i16, "h", "h", "16", "47, 16", "47"); +atomic_sub_word!(u16, "h", "h", "16", "47, 16", "47"); +atomic!(i32, ""); +atomic!(u32, ""); +atomic!(i64, "g"); +atomic!(u64, "g"); +atomic!(isize, "g"); +atomic!(usize, "g"); + +// https://github.com/llvm/llvm-project/commit/a11f63a952664f700f076fd754476a2b9eb158cc +macro_rules! atomic128 { + ($int_type:ident) => { + impl AtomicLoad for $int_type { + #[inline] + unsafe fn atomic_load( + src: *const MaybeUninit, + _order: Ordering, + ) -> MaybeUninit { + debug_assert!(src as usize % mem::size_of::<$int_type>() == 0); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + // atomic load is always SeqCst. + asm!( + // (atomic) load from src to out pair + "lpq %r0, 0({src})", + // store out pair to out + "stg %r1, 8({out})", + "stg %r0, 0({out})", + src = in(reg) ptr_reg!(src), + out = in(reg) ptr_reg!(out_ptr), + // Quadword atomic instructions work with even/odd pair of specified register and subsequent register. + out("r0") _, // out (hi) + out("r1") _, // out (lo) + options(nostack, preserves_flags), + ); + } + out + } + } + impl AtomicStore for $int_type { + #[inline] + unsafe fn atomic_store( + dst: *mut MaybeUninit, + val: MaybeUninit, + order: Ordering, + ) { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + let val = val.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + macro_rules! atomic_store { + ($fence:tt) => { + asm!( + // load from val to val pair + "lg %r1, 8({val})", + "lg %r0, 0({val})", + // (atomic) store val pair to dst + "stpq %r0, 0({dst})", + $fence, + dst = in(reg) ptr_reg!(dst), + val = in(reg) ptr_reg!(val), + // Quadword atomic instructions work with even/odd pair of specified register and subsequent register. + out("r0") _, // val (hi) + out("r1") _, // val (lo) + options(nostack, preserves_flags), + ) + }; + } + match order { + // Relaxed and Release stores are equivalent. + Ordering::Relaxed | Ordering::Release => atomic_store!(""), + // bcr 14,0 (fast-BCR-serialization) requires z196 or later. + #[cfg(any( + target_feature = "fast-serialization", + atomic_maybe_uninit_target_feature = "fast-serialization", + ))] + Ordering::SeqCst => atomic_store!("bcr 14, 0"), + #[cfg(not(any( + target_feature = "fast-serialization", + atomic_maybe_uninit_target_feature = "fast-serialization", + )))] + Ordering::SeqCst => atomic_store!("bcr 15, 0"), + _ => unreachable!("{:?}", order), + } + } + } + } + impl AtomicSwap for $int_type { + #[inline] + unsafe fn atomic_swap( + dst: *mut MaybeUninit, + val: MaybeUninit, + _order: Ordering, + ) -> MaybeUninit { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + let val = val.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + // atomic swap is always SeqCst. + asm!( + // load from val to val pair + "lg %r1, 8({val})", + "lg %r0, 0({val})", + // (atomic) swap (CAS loop) + "lpq %r2, 0({dst})", + "2:", + "cdsg %r2, %r0, 0({dst})", + "jl 2b", + // store out pair to out + "stg %r3, 8({out})", + "stg %r2, 0({out})", + dst = inout(reg) ptr_reg!(dst) => _, + val = in(reg) ptr_reg!(val), + out = inout(reg) ptr_reg!(out_ptr) => _, + // Quadword atomic instructions work with even/odd pair of specified register and subsequent register. + out("r0") _, // val (hi) + out("r1") _, // val (lo) + lateout("r2") _, // out (hi) + lateout("r3") _, // out (lo) + // Do not use `preserves_flags` because CDSG modifies the condition code. + options(nostack), + ); + } + out + } + } + impl AtomicCompareExchange for $int_type { + #[inline] + unsafe fn atomic_compare_exchange( + dst: *mut MaybeUninit, + old: MaybeUninit, + new: MaybeUninit, + _success: Ordering, + _failure: Ordering, + ) -> (MaybeUninit, bool) { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + let old = old.as_ptr(); + let new = new.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + let mut r: i64; + // compare_exchange is always SeqCst. + asm!( + // load from old/new to old/new pairs + "lg %r1, 8({old})", + "lg %r0, 0({old})", + "lg %r13, 8({new})", + "lg %r12, 0({new})", + // (atomic) CAS + "cdsg %r0, %r12, 0({dst})", + // store condition code + "ipm {r}", + // store out pair to out + "stg %r1, 8({out})", + "stg %r0, 0({out})", + dst = in(reg) ptr_reg!(dst), + old = in(reg) ptr_reg!(old), + new = in(reg) ptr_reg!(new), + out = inout(reg) ptr_reg!(out_ptr) => _, + r = lateout(reg) r, + // Quadword atomic instructions work with even/odd pair of specified register and subsequent register. + out("r0") _, // old (hi) -> out (hi) + out("r1") _, // old (lo) -> out (lo) + out("r12") _, // new (hi) + out("r13") _, // new (hi) + // Do not use `preserves_flags` because CDSG modifies the condition code. + options(nostack), + ); + (out, extract_cc(r)) + } + } + } + }; +} + +atomic128!(i128); +atomic128!(u128); + +#[macro_export] +macro_rules! cfg_has_atomic_8 { + ($($tt:tt)*) => { $($tt)* }; +} +#[macro_export] +macro_rules! cfg_no_atomic_8 { + ($($tt:tt)*) => {}; +} +#[macro_export] +macro_rules! cfg_has_atomic_16 { + ($($tt:tt)*) => { $($tt)* }; +} +#[macro_export] +macro_rules! cfg_no_atomic_16 { + ($($tt:tt)*) => {}; +} +#[macro_export] +macro_rules! cfg_has_atomic_32 { + ($($tt:tt)*) => { $($tt)* }; +} +#[macro_export] +macro_rules! cfg_no_atomic_32 { + ($($tt:tt)*) => {}; +} +#[macro_export] +macro_rules! cfg_has_atomic_64 { + ($($tt:tt)*) => { $($tt)* }; +} +#[macro_export] +macro_rules! cfg_no_atomic_64 { + ($($tt:tt)*) => {}; +} +#[macro_export] +macro_rules! cfg_has_atomic_128 { + ($($tt:tt)*) => { $($tt)* }; +} +#[macro_export] +macro_rules! cfg_no_atomic_128 { + ($($tt:tt)*) => {}; +} +#[macro_export] +macro_rules! cfg_has_atomic_cas { + ($($tt:tt)*) => { $($tt)* }; +} +#[macro_export] +macro_rules! cfg_no_atomic_cas { + ($($tt:tt)*) => {}; +} diff --git a/src/arch_legacy/x86.rs b/src/arch_legacy/x86.rs new file mode 100644 index 00000000..d043fcb5 --- /dev/null +++ b/src/arch_legacy/x86.rs @@ -0,0 +1,840 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// x86 and x86_64 +// +// Refs: +// - x86 and amd64 instruction reference https://www.felixcloutier.com/x86 +// - portable-atomic https://github.com/taiki-e/portable-atomic +// +// Generated asm: +// - x86_64 https://godbolt.org/z/fvqWGT5E6 +// - x86_64 (+cmpxchg16b) https://godbolt.org/z/fGdj8naT9 +// - x86 (i686) https://godbolt.org/z/9jKcboaoG +// - x86 (i686,-sse2) https://godbolt.org/z/sjYK57r96 +// - x86 (i586) https://godbolt.org/z/5rrzYGxPe +// - x86 (i586,-x87) https://godbolt.org/z/GvcdhqxYo +// - x86 (i486) https://godbolt.org/z/nPaGY4oEM +// - x86 (i386) https://godbolt.org/z/YWEc63Kac + +use core::{ + arch::asm, + mem::{self, MaybeUninit}, + sync::atomic::Ordering, +}; + +use crate::raw::{AtomicCompareExchange, AtomicLoad, AtomicStore, AtomicSwap}; + +#[cfg(target_pointer_width = "32")] +macro_rules! ptr_modifier { + () => { + ":e" + }; +} +#[cfg(target_pointer_width = "64")] +macro_rules! ptr_modifier { + () => { + "" + }; +} + +#[cfg(target_arch = "x86")] +#[cfg(not(atomic_maybe_uninit_no_cmpxchg8b))] +#[cfg(target_feature = "sse")] +#[cfg(target_feature = "sse2")] +macro_rules! if_sse2 { + ($then:expr, $else:expr) => { + $then + }; +} +#[cfg(target_arch = "x86")] +#[cfg(not(atomic_maybe_uninit_no_cmpxchg8b))] +#[cfg(target_feature = "sse")] +#[cfg(not(target_feature = "sse2"))] +macro_rules! if_sse2 { + ($then:expr, $else:expr) => { + $else + }; +} + +macro_rules! atomic { + ($int_type:ident, $val_reg:tt, $val_modifier:tt, $ptr_size:tt, $cmpxchg_cmp_reg:tt) => { + impl AtomicLoad for $int_type { + #[inline] + unsafe fn atomic_load( + src: *const MaybeUninit, + _order: Ordering, + ) -> MaybeUninit { + debug_assert!(src as usize % mem::size_of::<$int_type>() == 0); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + // atomic load is always SeqCst. + asm!( + // (atomic) load from src to tmp + concat!("mov {tmp", $val_modifier, "}, ", $ptr_size, " ptr [{src", ptr_modifier!(), "}]"), + // store tmp to out + concat!("mov ", $ptr_size, " ptr [{out", ptr_modifier!(), "}], {tmp", $val_modifier, "}"), + src = in(reg) src, + out = inout(reg) out_ptr => _, + tmp = lateout($val_reg) _, + options(nostack, preserves_flags), + ); + } + out + } + } + impl AtomicStore for $int_type { + #[inline] + unsafe fn atomic_store( + dst: *mut MaybeUninit, + val: MaybeUninit, + order: Ordering, + ) { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + let val = val.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + match order { + // Relaxed and Release stores are equivalent. + Ordering::Relaxed | Ordering::Release => { + asm!( + // load from val to tmp + concat!("mov {tmp", $val_modifier, "}, ", $ptr_size, " ptr [{val", ptr_modifier!(), "}]"), + // (atomic) store tmp to dst + concat!("mov ", $ptr_size, " ptr [{dst", ptr_modifier!(), "}], {tmp", $val_modifier, "}"), + dst = inout(reg) dst => _, + val = in(reg) val, + tmp = lateout($val_reg) _, + options(nostack, preserves_flags), + ); + } + Ordering::SeqCst => { + asm!( + // load from val to tmp + concat!("mov {tmp", $val_modifier, "}, ", $ptr_size, " ptr [{val", ptr_modifier!(), "}]"), + // (atomic) store tmp to dst (SeqCst store is xchg, not mov) + concat!("xchg ", $ptr_size, " ptr [{dst", ptr_modifier!(), "}], {tmp", $val_modifier, "}"), + dst = inout(reg) dst => _, + val = in(reg) val, + tmp = lateout($val_reg) _, + options(nostack, preserves_flags), + ); + } + _ => unreachable!("{:?}", order), + } + } + } + } + impl AtomicSwap for $int_type { + #[inline] + unsafe fn atomic_swap( + dst: *mut MaybeUninit, + val: MaybeUninit, + _order: Ordering, + ) -> MaybeUninit { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + let val = val.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + // atomic swap is always SeqCst. + asm!( + // load from val to tmp + concat!("mov {tmp", $val_modifier, "}, ", $ptr_size, " ptr [{val", ptr_modifier!(), "}]"), + // (atomic) swap tmp and dst + concat!("xchg ", $ptr_size, " ptr [{dst", ptr_modifier!(), "}], {tmp", $val_modifier, "}"), + // store tmp to out + concat!("mov ", $ptr_size, " ptr [{out", ptr_modifier!(), "}], {tmp", $val_modifier, "}"), + dst = inout(reg) dst => _, + val = in(reg) val, + out = inout(reg) out_ptr => _, + tmp = lateout($val_reg) _, + options(nostack, preserves_flags), + ); + } + out + } + } + #[cfg(not(all(target_arch = "x86", atomic_maybe_uninit_no_cmpxchg)))] + impl AtomicCompareExchange for $int_type { + #[inline] + unsafe fn atomic_compare_exchange( + dst: *mut MaybeUninit, + old: MaybeUninit, + new: MaybeUninit, + _success: Ordering, + _failure: Ordering, + ) -> (MaybeUninit, bool) { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + let old = old.as_ptr(); + let new = new.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + // + // Refs: https://www.felixcloutier.com/x86/cmpxchg + unsafe { + let r: u8; + // compare_exchange is always SeqCst. + asm!( + // load from old/new to $cmpxchg_cmp_reg/tmp_new + concat!("mov ", $cmpxchg_cmp_reg, ", ", $ptr_size, " ptr [{old", ptr_modifier!(), "}]"), + concat!("mov {tmp_new", $val_modifier, "}, ", $ptr_size, " ptr [{new", ptr_modifier!(), "}]"), + // (atomic) CAS + // - Compare $cmpxchg_cmp_reg with dst. + // - If equal, ZF is set and tmp_new is loaded into dst. + // - Else, clear ZF and load dst into $cmpxchg_cmp_reg. + concat!("lock cmpxchg ", $ptr_size, " ptr [{dst", ptr_modifier!(), "}], {tmp_new", $val_modifier, "}"), + // load ZF to r + "sete {r}", + // store $cmpxchg_cmp_reg to out + concat!("mov ", $ptr_size, " ptr [{out", ptr_modifier!(), "}], ", $cmpxchg_cmp_reg, ""), + dst = in(reg) dst, + old = in(reg) old, + new = in(reg) new, + out = in(reg) out_ptr, + tmp_new = out($val_reg) _, + r = out(reg_byte) r, + out($cmpxchg_cmp_reg) _, + // Do not use `preserves_flags` because CMPXCHG modifies the ZF flag. + options(nostack), + ); + debug_assert!(r == 0 || r == 1, "r={}", r); + (out, r != 0) + } + } + } + }; +} + +atomic!(i8, reg_byte, "", "byte", "al"); +atomic!(u8, reg_byte, "", "byte", "al"); +atomic!(i16, reg, ":x", "word", "ax"); +atomic!(u16, reg, ":x", "word", "ax"); +atomic!(i32, reg, ":e", "dword", "eax"); +atomic!(u32, reg, ":e", "dword", "eax"); +#[cfg(target_arch = "x86_64")] +atomic!(i64, reg, "", "qword", "rax"); +#[cfg(target_arch = "x86_64")] +atomic!(u64, reg, "", "qword", "rax"); +#[cfg(target_pointer_width = "32")] +atomic!(isize, reg, ":e", "dword", "eax"); +#[cfg(target_pointer_width = "32")] +atomic!(usize, reg, ":e", "dword", "eax"); +#[cfg(target_pointer_width = "64")] +atomic!(isize, reg, "", "qword", "rax"); +#[cfg(target_pointer_width = "64")] +atomic!(usize, reg, "", "qword", "rax"); + +// For load/store, we can use MOVQ(SSE2)/MOVLPS(SSE) instead of CMPXCHG8B. +// Refs: https://github.com/llvm/llvm-project/blob/llvmorg-17.0.0-rc2/llvm/test/CodeGen/X86/atomic-load-store-wide.ll +#[cfg(target_arch = "x86")] +#[cfg(not(atomic_maybe_uninit_no_cmpxchg8b))] +macro_rules! atomic64 { + ($int_type:ident) => { + impl AtomicLoad for $int_type { + #[inline] + unsafe fn atomic_load( + src: *const MaybeUninit, + _order: Ordering, + ) -> MaybeUninit { + debug_assert!(src as usize % mem::size_of::<$int_type>() == 0); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + + #[cfg(target_feature = "sse")] + // SAFETY: the caller must uphold the safety contract. + // cfg guarantees that the CPU supports SSE. + unsafe { + #[cfg(target_feature = "sse2")] + { + // atomic load is always SeqCst. + asm!( + // Refs: + // - https://www.felixcloutier.com/x86/movq (SSE2) + // - https://www.felixcloutier.com/x86/movd:movq (SSE2) + // - https://www.felixcloutier.com/x86/pshufd (SSE2) + // (atomic) load from src to tmp0 + "movq {tmp0}, qword ptr [{src}]", + // extract lower 64-bits + "pshufd {tmp1}, {tmp0}, 85", + // store tmp0/tmp1 to out + "movd dword ptr [{out}], {tmp0}", + "movd dword ptr [{out} + 4], {tmp1}", + src = in(reg) src, + out = in(reg) out_ptr, + tmp0 = out(xmm_reg) _, + tmp1 = out(xmm_reg) _, + options(nostack, preserves_flags), + ); + } + #[cfg(not(target_feature = "sse2"))] + { + // atomic load is always SeqCst. + asm!( + // Refs: + // - https://www.felixcloutier.com/x86/xorps (SSE) + // - https://www.felixcloutier.com/x86/movlps (SSE) + // - https://www.felixcloutier.com/x86/movss (SSE) + // - https://www.felixcloutier.com/x86/shufps (SSE) + "xorps {tmp}, {tmp}", + // (atomic) load from src to tmp + "movlps {tmp}, qword ptr [{src}]", + // store tmp to out + "movss dword ptr [{out}], {tmp}", + "shufps {tmp}, {tmp}, 85", + "movss dword ptr [{out} + 4], {tmp}", + src = in(reg) src, + out = in(reg) out_ptr, + tmp = out(xmm_reg) _, + options(nostack, preserves_flags), + ); + } + } + #[cfg(not(target_feature = "sse"))] + // SAFETY: the caller must uphold the safety contract. + // + // Refs: https://www.felixcloutier.com/x86/cmpxchg8b:cmpxchg16b + unsafe { + // atomic load is always SeqCst. + asm!( + // esi is reserved by LLVM + "xchg {esi_tmp}, esi", + // (atomic) load by cmpxchg(0, 0) + "lock cmpxchg8b qword ptr [edi]", + // store current value to out + "mov dword ptr [esi], eax", + "mov dword ptr [esi + 4], edx", + "mov esi, {esi_tmp}", // restore esi + esi_tmp = inout(reg) out_ptr => _, + // set old/new args of cmpxchg8b to 0 + inout("eax") 0_u32 => _, + inout("edx") 0_u32 => _, + in("ebx") 0_u32, + in("ecx") 0_u32, + in("edi") src, + // Do not use `preserves_flags` because CMPXCHG8B modifies the ZF flag. + options(nostack), + ); + } + out + } + } + impl AtomicStore for $int_type { + #[inline] + unsafe fn atomic_store( + dst: *mut MaybeUninit, + val: MaybeUninit, + order: Ordering, + ) { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + let val = val.as_ptr(); + + #[cfg(target_feature = "sse")] + // SAFETY: the caller must uphold the safety contract. + // cfg guarantees that the CPU supports SSE. + // + // Refs: + // - https://www.felixcloutier.com/x86/movlps (SSE) + // - https://www.felixcloutier.com/x86/xorps (SSE) + // - https://www.felixcloutier.com/x86/movsd (SSE2) + // - https://www.felixcloutier.com/x86/lock + // - https://www.felixcloutier.com/x86/or + unsafe { + match order { + // Relaxed and Release stores are equivalent. + Ordering::Relaxed | Ordering::Release => { + asm!( + if_sse2!("", "xorps {tmp}, {tmp}"), + // load from val to tmp + if_sse2!("movsd {tmp}, qword ptr [{val}]", "movlps {tmp}, qword ptr [{val}]"), + // (atomic) store tmp to dst + "movlps qword ptr [{dst}], {tmp}", + dst = in(reg) dst, + val = in(reg) val, + tmp = out(xmm_reg) _, + options(nostack, preserves_flags), + ); + } + Ordering::SeqCst => { + let p = core::cell::UnsafeCell::new(0_u32); + asm!( + // load from val to tmp + if_sse2!("", "xorps {tmp}, {tmp}"), + if_sse2!("movsd {tmp}, qword ptr [{val}]", "movlps {tmp}, qword ptr [{val}]"), + // (atomic) store tmp to dst + "movlps qword ptr [{dst}], {tmp}", + "lock or dword ptr [{p}], 0", // equivalent to mfence, but doesn't require SSE2 + dst = in(reg) dst, + val = in(reg) val, + tmp = out(xmm_reg) _, + p = in(reg) p.get(), + // Do not use `preserves_flags` because OR modifies the OF, CF, SF, ZF, and PF flags. + options(nostack), + ); + } + _ => unreachable!("{:?}", order), + } + } + #[cfg(not(target_feature = "sse"))] + // SAFETY: the caller must uphold the safety contract. + // + // Refs: https://www.felixcloutier.com/x86/cmpxchg8b:cmpxchg16b + unsafe { + // atomic store is always SeqCst. + let _ = order; + asm!( + "mov ebx, dword ptr [eax]", + "mov ecx, dword ptr [eax + 4]", + // This is based on the code generated for the first load in DW RMWs by LLVM, + // but it is interesting that they generate code that does mixed-sized atomic access. + // + // This is not single-copy atomic reads, but this is ok because subsequent + // CAS will check for consistency. + "mov eax, dword ptr [edi]", + "mov edx, dword ptr [edi + 4]", + // (atomic) store (CAS loop) + "2:", + "lock cmpxchg8b qword ptr [edi]", + "jne 2b", + inout("eax") val => _, + out("edx") _, + out("ebx") _, + out("ecx") _, + in("edi") dst, + // Do not use `preserves_flags` because CMPXCHG8B modifies the ZF flag. + options(nostack), + ); + } + } + } + impl AtomicSwap for $int_type { + #[inline] + unsafe fn atomic_swap( + dst: *mut MaybeUninit, + val: MaybeUninit, + _order: Ordering, + ) -> MaybeUninit { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + let val = val.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + // + // Refs: https://www.felixcloutier.com/x86/cmpxchg8b:cmpxchg16b + unsafe { + // atomic store is always SeqCst. + asm!( + // esi is reserved by LLVM + "xchg {esi_tmp}, esi", + "mov ebx, dword ptr [eax]", + "mov ecx, dword ptr [eax + 4]", + // This is based on the code generated for the first load in DW RMWs by LLVM, + // but it is interesting that they generate code that does mixed-sized atomic access. + // + // This is not single-copy atomic reads, but this is ok because subsequent + // CAS will check for consistency. + "mov eax, dword ptr [edi]", + "mov edx, dword ptr [edi + 4]", + // (atomic) swap (CAS loop) + "2:", + "lock cmpxchg8b qword ptr [edi]", + "jne 2b", + // store previous value to out + "mov dword ptr [esi], eax", + "mov dword ptr [esi + 4], edx", + "mov esi, {esi_tmp}", // restore esi + esi_tmp = inout(reg) out_ptr => _, + inout("eax") val => _, + out("edx") _, + out("ebx") _, + out("ecx") _, + in("edi") dst, + // Do not use `preserves_flags` because CMPXCHG8B modifies the ZF flag. + options(nostack), + ); + } + out + } + } + impl AtomicCompareExchange for $int_type { + #[inline] + unsafe fn atomic_compare_exchange( + dst: *mut MaybeUninit, + old: MaybeUninit, + new: MaybeUninit, + _success: Ordering, + _failure: Ordering, + ) -> (MaybeUninit, bool) { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + let old = old.as_ptr(); + let new = new.as_ptr(); + + // SAFETY: the caller must uphold the safety contract. + // + // Refs: https://www.felixcloutier.com/x86/cmpxchg + unsafe { + let mut r: u32; + // compare_exchange is always SeqCst. + asm!( + // esi is reserved by LLVM + "xchg {esi_tmp}, esi", + "mov eax, dword ptr [edx]", + "mov edx, dword ptr [edx + 4]", + "mov ebx, dword ptr [ecx]", + "mov ecx, dword ptr [ecx + 4]", + // (atomic) CAS + "lock cmpxchg8b qword ptr [edi]", + "sete cl", + // store previous value to out + "mov dword ptr [esi], eax", + "mov dword ptr [esi + 4], edx", + "mov esi, {esi_tmp}", // restore esi + esi_tmp = inout(reg) out_ptr => _, + out("eax") _, + inout("edx") old => _, + out("ebx") _, + inout("ecx") new => r, + in("edi") dst, + // Do not use `preserves_flags` because CMPXCHG8B modifies the ZF flag. + options(nostack), + ); + debug_assert!(r as u8 == 0 || r as u8 == 1, "r={}", r as u8); + (out, r as u8 != 0) + } + } + } + }; +} + +#[cfg(target_arch = "x86")] +#[cfg(not(atomic_maybe_uninit_no_cmpxchg8b))] +atomic64!(i64); +#[cfg(target_arch = "x86")] +#[cfg(not(atomic_maybe_uninit_no_cmpxchg8b))] +atomic64!(u64); + +#[cfg(target_arch = "x86_64")] +#[cfg(any(target_feature = "cmpxchg16b", atomic_maybe_uninit_target_feature = "cmpxchg16b"))] +macro_rules! atomic128 { + ($int_type:ident) => { + #[cfg(target_pointer_width = "32")] + atomic128!($int_type, "edi", "esi", "r8d", "edx"); + #[cfg(target_pointer_width = "64")] + atomic128!($int_type, "rdi", "rsi", "r8", "rdx"); + }; + ($int_type:ident, $rdi:tt, $rsi:tt, $r8:tt, $rdx:tt) => { + impl AtomicLoad for $int_type { + #[inline] + unsafe fn atomic_load( + src: *const MaybeUninit, + _order: Ordering, + ) -> MaybeUninit { + debug_assert!(src as usize % mem::size_of::<$int_type>() == 0); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + + // SAFETY: the caller must guarantee that `src` is valid for both writes and + // reads, 16-byte aligned, and that there are no concurrent non-atomic operations. + // cfg guarantees that the CPU supports CMPXCHG16B. + // + // If the value at `dst` (destination operand) and rdx:rax are equal, the + // 128-bit value in rcx:rbx is stored in the `dst`, otherwise the value at + // `dst` is loaded to rdx:rax. + // + // The ZF flag is set if the value at `dst` and rdx:rax are equal, + // otherwise it is cleared. Other flags are unaffected. + // + // Refs: https://www.felixcloutier.com/x86/cmpxchg8b:cmpxchg16b + unsafe { + // atomic load is always SeqCst. + asm!( + // rbx is reserved by LLVM + "mov {rbx_tmp}, rbx", + "xor rbx, rbx", // zeroed rbx + // (atomic) load by cmpxchg(0, 0) + concat!("lock cmpxchg16b xmmword ptr [", $rdi, "]"), + // store current value to out + concat!("mov qword ptr [", $rsi, "], rax"), + concat!("mov qword ptr [", $rsi, " + 8], rdx"), + "mov rbx, {rbx_tmp}", // restore rbx + // set old/new args of cmpxchg16b to 0 (rbx is zeroed after saved to rbx_tmp, to avoid xchg) + rbx_tmp = out(reg) _, + in("rcx") 0_u64, + inout("rax") 0_u64 => _, + inout("rdx") 0_u64 => _, + in($rdi) src, + in($rsi) out_ptr, + // Do not use `preserves_flags` because CMPXCHG16B modifies the ZF flag. + options(nostack), + ); + } + out + } + } + impl AtomicStore for $int_type { + #[inline] + unsafe fn atomic_store( + dst: *mut MaybeUninit, + val: MaybeUninit, + _order: Ordering, + ) { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + let val = val.as_ptr(); + + // SAFETY: the caller must guarantee that `dst` is valid for both writes and + // reads, 16-byte aligned, and that there are no concurrent non-atomic operations. + // cfg guarantees that the CPU supports CMPXCHG16B. + // + // If the value at `dst` (destination operand) and rdx:rax are equal, the + // 128-bit value in rcx:rbx is stored in the `dst`, otherwise the value at + // `dst` is loaded to rdx:rax. + // + // The ZF flag is set if the value at `dst` and rdx:rax are equal, + // otherwise it is cleared. Other flags are unaffected. + // + // Refs: https://www.felixcloutier.com/x86/cmpxchg8b:cmpxchg16b + unsafe { + // atomic store is always SeqCst. + asm!( + // rbx is reserved by LLVM + "mov {rbx_tmp}, rbx", + concat!("mov rbx, qword ptr [", $rsi, "]"), + concat!("mov rcx, qword ptr [", $rsi, " + 8]"), + // This is based on the code generated for the first load in DW RMWs by LLVM, + // but it is interesting that they generate code that does mixed-sized atomic access. + // + // This is not single-copy atomic reads, but this is ok because subsequent + // CAS will check for consistency. + concat!("mov rax, qword ptr [", $rdi, "]"), + concat!("mov rdx, qword ptr [", $rdi, " + 8]"), + // (atomic) store (CAS loop) + "2:", + concat!("lock cmpxchg16b xmmword ptr [", $rdi, "]"), + "jne 2b", + "mov rbx, {rbx_tmp}", // restore rbx + rbx_tmp = out(reg) _, + out("rax") _, + out("rcx") _, + out("rdx") _, + in($rdi) dst, + in($rsi) val, + // Do not use `preserves_flags` because CMPXCHG16B modifies the ZF flag. + options(nostack), + ); + } + } + } + impl AtomicSwap for $int_type { + #[inline] + unsafe fn atomic_swap( + dst: *mut MaybeUninit, + val: MaybeUninit, + _order: Ordering, + ) -> MaybeUninit { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + let val = val.as_ptr(); + + // SAFETY: the caller must guarantee that `dst` is valid for both writes and + // reads, 16-byte aligned, and that there are no concurrent non-atomic operations. + // cfg guarantees that the CPU supports CMPXCHG16B. + // + // If the value at `dst` (destination operand) and rdx:rax are equal, the + // 128-bit value in rcx:rbx is stored in the `dst`, otherwise the value at + // `dst` is loaded to rdx:rax. + // + // The ZF flag is set if the value at `dst` and rdx:rax are equal, + // otherwise it is cleared. Other flags are unaffected. + // + // Refs: https://www.felixcloutier.com/x86/cmpxchg8b:cmpxchg16b + unsafe { + // atomic swap is always SeqCst. + asm!( + // rbx is reserved by LLVM + "mov {rbx_tmp}, rbx", + concat!("mov rbx, qword ptr [", $rsi, "]"), + concat!("mov rcx, qword ptr [", $rsi, " + 8]"), + // This is based on the code generated for the first load in DW RMWs by LLVM, + // but it is interesting that they generate code that does mixed-sized atomic access. + // + // This is not single-copy atomic reads, but this is ok because subsequent + // CAS will check for consistency. + concat!("mov rax, qword ptr [", $rdi, "]"), + concat!("mov rdx, qword ptr [", $rdi, " + 8]"), + // (atomic) swap (CAS loop) + "2:", + concat!("lock cmpxchg16b xmmword ptr [", $rdi, "]"), + "jne 2b", + // store previous value to out + concat!("mov qword ptr [", $r8, "], rax"), + concat!("mov qword ptr [", $r8, " + 8], rdx"), + "mov rbx, {rbx_tmp}", // restore rbx + rbx_tmp = out(reg) _, + out("rax") _, + out("rcx") _, + out("rdx") _, + in($rdi) dst, + in($rsi) val, + in($r8) out_ptr, + // Do not use `preserves_flags` because CMPXCHG16B modifies the ZF flag. + options(nostack), + ); + } + out + } + } + impl AtomicCompareExchange for $int_type { + #[inline] + unsafe fn atomic_compare_exchange( + dst: *mut MaybeUninit, + old: MaybeUninit, + new: MaybeUninit, + _success: Ordering, + _failure: Ordering, + ) -> (MaybeUninit, bool) { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + let mut out: MaybeUninit = MaybeUninit::uninit(); + let out_ptr = out.as_mut_ptr(); + let old = old.as_ptr(); + let new = new.as_ptr(); + + // SAFETY: the caller must guarantee that `dst` is valid for both writes and + // reads, 16-byte aligned, and that there are no concurrent non-atomic operations. + // cfg guarantees that the CPU supports CMPXCHG16B. + // + // If the value at `dst` (destination operand) and rdx:rax are equal, the + // 128-bit value in rcx:rbx is stored in the `dst`, otherwise the value at + // `dst` is loaded to rdx:rax. + // + // The ZF flag is set if the value at `dst` and rdx:rax are equal, + // otherwise it is cleared. Other flags are unaffected. + // + // Refs: https://www.felixcloutier.com/x86/cmpxchg8b:cmpxchg16b + unsafe { + let mut r: u64; + // compare_exchange is always SeqCst. + asm!( + // rbx is reserved by LLVM + "mov {rbx_tmp}, rbx", + concat!("mov rax, qword ptr [", $rsi, "]"), + concat!("mov rsi, qword ptr [", $rsi, " + 8]"), + concat!("mov rbx, qword ptr [", $rdx, "]"), + concat!("mov rcx, qword ptr [", $rdx, " + 8]"), + "mov rdx, rsi", + // (atomic) CAS + concat!("lock cmpxchg16b xmmword ptr [", $rdi, "]"), + "sete cl", + // store previous value to out + concat!("mov qword ptr [", $r8, "], rax"), + concat!("mov qword ptr [", $r8, " + 8], rdx"), + "mov rbx, {rbx_tmp}", // restore rbx + rbx_tmp = out(reg) _, + out("rax") _, + out("rcx") r, + lateout("rdx") _, + lateout("rsi") _, + in($rdi) dst, + in($rsi) old, + in($rdx) new, + in($r8) out_ptr, + // Do not use `preserves_flags` because CMPXCHG16B modifies the ZF flag. + options(nostack), + ); + debug_assert!(r as u8 == 0 || r as u8 == 1, "r={}", r as u8); + (out, r as u8 != 0) + } + } + } + }; +} + +#[cfg(target_arch = "x86_64")] +#[cfg(any(target_feature = "cmpxchg16b", atomic_maybe_uninit_target_feature = "cmpxchg16b"))] +atomic128!(i128); +#[cfg(target_arch = "x86_64")] +#[cfg(any(target_feature = "cmpxchg16b", atomic_maybe_uninit_target_feature = "cmpxchg16b"))] +atomic128!(u128); + +#[macro_export] +macro_rules! cfg_has_atomic_8 { + ($($tt:tt)*) => { $($tt)* }; +} +#[macro_export] +macro_rules! cfg_no_atomic_8 { + ($($tt:tt)*) => {}; +} +#[macro_export] +macro_rules! cfg_has_atomic_16 { + ($($tt:tt)*) => { $($tt)* }; +} +#[macro_export] +macro_rules! cfg_no_atomic_16 { + ($($tt:tt)*) => {}; +} +#[macro_export] +macro_rules! cfg_has_atomic_32 { + ($($tt:tt)*) => { $($tt)* }; +} +#[macro_export] +macro_rules! cfg_no_atomic_32 { + ($($tt:tt)*) => {}; +} +#[macro_export] +macro_rules! cfg_has_atomic_64 { + ($($tt:tt)*) => { $($tt)* }; +} +#[macro_export] +macro_rules! cfg_no_atomic_64 { + ($($tt:tt)*) => {}; +} +#[cfg(not(all( + target_arch = "x86_64", + any(target_feature = "cmpxchg16b", atomic_maybe_uninit_target_feature = "cmpxchg16b"), +)))] +#[macro_export] +macro_rules! cfg_has_atomic_128 { + ($($tt:tt)*) => {}; +} +#[cfg(not(all( + target_arch = "x86_64", + any(target_feature = "cmpxchg16b", atomic_maybe_uninit_target_feature = "cmpxchg16b"), +)))] +#[macro_export] +macro_rules! cfg_no_atomic_128 { + ($($tt:tt)*) => { $($tt)* }; +} +#[cfg(all( + target_arch = "x86_64", + any(target_feature = "cmpxchg16b", atomic_maybe_uninit_target_feature = "cmpxchg16b"), +))] +#[macro_export] +macro_rules! cfg_has_atomic_128 { + ($($tt:tt)*) => { $($tt)* }; +} +#[cfg(all( + target_arch = "x86_64", + any(target_feature = "cmpxchg16b", atomic_maybe_uninit_target_feature = "cmpxchg16b"), +))] +#[macro_export] +macro_rules! cfg_no_atomic_128 { + ($($tt:tt)*) => {}; +} +#[macro_export] +macro_rules! cfg_has_atomic_cas { + ($($tt:tt)*) => { $($tt)* }; +} +#[macro_export] +macro_rules! cfg_no_atomic_cas { + ($($tt:tt)*) => {}; +} diff --git a/src/lib.rs b/src/lib.rs index 9e95a8c9..ac1152e5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -110,6 +110,7 @@ mod utils; #[macro_use] mod tests; +#[cfg_attr(atomic_maybe_uninit_no_asm_maybe_uninit, path = "arch_legacy/mod.rs")] mod arch; pub mod raw; @@ -257,14 +258,12 @@ impl AtomicMaybeUninit { T: AtomicLoad, { utils::assert_load_ordering(order); - let mut out = MaybeUninit::::uninit(); // SAFETY: any data races are prevented by atomic intrinsics, the raw // pointer passed in is valid because we got it from a reference, // and we've checked the order is valid. Alignment is upheld because // `PrimitivePriv` is a private trait that ensures sufficient alignment // of `T::Align`, and we got our `_align` field. - unsafe { T::atomic_load(self.v.get(), &mut out, order) } - out + unsafe { T::atomic_load(self.v.get(), order) } } /// Stores a value into the atomic value. @@ -299,7 +298,7 @@ impl AtomicMaybeUninit { // and we've checked the order is valid. Alignment is upheld because // `PrimitivePriv` is a private trait that ensures sufficient alignment // of `T::Align`, and we got our `_align` field. - unsafe { T::atomic_store(self.v.get(), &val, order) } + unsafe { T::atomic_store(self.v.get(), val, order) } } /// Stores a value into the atomic value, returning the previous value. @@ -327,11 +326,9 @@ impl AtomicMaybeUninit { where T: AtomicSwap, { - let mut out = MaybeUninit::::uninit(); // SAFETY: any data races are prevented by atomic intrinsics and the raw // pointer passed in is valid because we got it from a reference. - unsafe { T::atomic_swap(self.v.get(), &val, &mut out, order) } - out + unsafe { T::atomic_swap(self.v.get(), val, order) } } /// Stores a value into the atomic value if the current value is the same as @@ -499,16 +496,14 @@ impl AtomicMaybeUninit { T: AtomicCompareExchange, { utils::assert_compare_exchange_ordering(success, failure); - let mut out = MaybeUninit::::uninit(); - // SAFETY: any data races are prevented by atomic intrinsics and the raw - // pointer passed in is valid because we got it from a reference. - // Alignment is upheld because `PrimitivePriv` is a private trait that - // ensures sufficient alignment of `T::Align`, and we got our `_align` - // field. - let res = unsafe { - T::atomic_compare_exchange(self.v.get(), ¤t, &new, &mut out, success, failure) - }; - if res { + let (out, ok) = + // SAFETY: any data races are prevented by atomic intrinsics and the raw + // pointer passed in is valid because we got it from a reference. + // Alignment is upheld because `PrimitivePriv` is a private trait that + // ensures sufficient alignment of `T::Align`, and we got our `_align` + // field. + unsafe { T::atomic_compare_exchange(self.v.get(), current, new, success, failure) }; + if ok { Ok(out) } else { Err(out) @@ -581,23 +576,15 @@ impl AtomicMaybeUninit { T: AtomicCompareExchange, { utils::assert_compare_exchange_ordering(success, failure); - let mut out = MaybeUninit::::uninit(); // SAFETY: any data races are prevented by atomic intrinsics and the raw // pointer passed in is valid because we got it from a reference. // Alignment is upheld because `PrimitivePriv` is a private trait that // ensures sufficient alignment of `T::Align`, and we got our `_align` // field. - let res = unsafe { - T::atomic_compare_exchange_weak( - self.v.get(), - ¤t, - &new, - &mut out, - success, - failure, - ) + let (out, ok) = unsafe { + T::atomic_compare_exchange_weak(self.v.get(), current, new, success, failure) }; - if res { + if ok { Ok(out) } else { Err(out) diff --git a/src/raw.rs b/src/raw.rs index 0abb36e4..39d7239c 100644 --- a/src/raw.rs +++ b/src/raw.rs @@ -35,10 +35,6 @@ pub trait AtomicLoad: Primitive { /// Otherwise, `src` must be valid for reads. /// - `src` must be properly aligned **to the size of `Self`**. /// (For example, if `Self` is `u128`, `src` must be aligned to 16-byte even if the alignment of `u128` is 8-byte.) - /// - `src` must go through [`UnsafeCell::get`]. - /// - `src` must *not* overlap with `out`. - /// - `out` must be valid for writes. - /// - `out` must be properly aligned. /// - `order` must be [`SeqCst`], [`Acquire`], or [`Relaxed`]. /// /// The rules for the validity of pointer follow [the rules applied to @@ -46,11 +42,7 @@ pub trait AtomicLoad: Primitive { /// except that concurrent atomic operations on `src` are allowed. /// /// [validity]: core::ptr#safety - unsafe fn atomic_load( - src: *const MaybeUninit, - out: *mut MaybeUninit, - order: Ordering, - ); + unsafe fn atomic_load(src: *const MaybeUninit, order: Ordering) -> MaybeUninit; } /// Atomic store. @@ -70,10 +62,6 @@ pub trait AtomicStore: Primitive { /// Otherwise, `dst` must be valid for writes. /// - `dst` must be properly aligned **to the size of `Self`**. /// (For example, if `Self` is `u128`, `dst` must be aligned to 16-byte even if the alignment of `u128` is 8-byte.) - /// - `dst` must go through [`UnsafeCell::get`]. - /// - `dst` must *not* overlap with `val`. - /// - `val` must be valid for reads. - /// - `val` must be properly aligned. /// - `order` must be [`SeqCst`], [`Release`], or [`Relaxed`]. /// /// The rules for the validity of pointer follow [the rules applied to @@ -81,11 +69,7 @@ pub trait AtomicStore: Primitive { /// except that concurrent atomic operations on `dst` are allowed. /// /// [validity]: core::ptr#safety - unsafe fn atomic_store( - dst: *mut MaybeUninit, - val: *const MaybeUninit, - order: Ordering, - ); + unsafe fn atomic_store(dst: *mut MaybeUninit, val: MaybeUninit, order: Ordering); } /// Atomic swap. @@ -106,12 +90,6 @@ pub trait AtomicSwap: AtomicLoad + AtomicStore { /// - `dst` must be valid for both reads and writes. /// - `dst` must be properly aligned **to the size of `Self`**. /// (For example, if `Self` is `u128`, `dst` must be aligned to 16-byte even if the alignment of `u128` is 8-byte.) - /// - `dst` must go through [`UnsafeCell::get`]. - /// - `dst` must *not* overlap with `val` or `out`. - /// - `val` must be valid for reads. - /// - `val` must be properly aligned. - /// - `out` must be valid for writes. - /// - `out` must be properly aligned. /// - `order` must be [`SeqCst`], [`AcqRel`], [`Acquire`], [`Release`], or [`Relaxed`]. /// /// The rules for the validity of pointer follow [the rules applied to @@ -121,10 +99,9 @@ pub trait AtomicSwap: AtomicLoad + AtomicStore { /// [validity]: core::ptr#safety unsafe fn atomic_swap( dst: *mut MaybeUninit, - val: *const MaybeUninit, - out: *mut MaybeUninit, + val: MaybeUninit, order: Ordering, - ); + ) -> MaybeUninit; } /// Atomic compare and exchange. @@ -153,14 +130,6 @@ pub trait AtomicCompareExchange: AtomicLoad + AtomicStore { /// - `dst` must be valid for both reads and writes. /// - `dst` must be properly aligned **to the size of `Self`**. /// (For example, if `Self` is `u128`, `dst` must be aligned to 16-byte even if the alignment of `u128` is 8-byte.) - /// - `dst` must go through [`UnsafeCell::get`]. - /// - `dst` must *not* overlap with `current`, `new`, or `out`. - /// - `current` must be valid for reads. - /// - `current` must be properly aligned. - /// - `new` must be valid for reads. - /// - `new` must be properly aligned. - /// - `out` must be valid for writes. - /// - `out` must be properly aligned. /// - `success` must be [`SeqCst`], [`AcqRel`], [`Acquire`], [`Release`], or [`Relaxed`]. /// - `failure` must be [`SeqCst`], [`Acquire`], or [`Relaxed`]. /// @@ -179,12 +148,11 @@ pub trait AtomicCompareExchange: AtomicLoad + AtomicStore { /// See [`AtomicMaybeUninit::compare_exchange`](crate::AtomicMaybeUninit::compare_exchange) for details. unsafe fn atomic_compare_exchange( dst: *mut MaybeUninit, - current: *const MaybeUninit, - new: *const MaybeUninit, - out: *mut MaybeUninit, + current: MaybeUninit, + new: MaybeUninit, success: Ordering, failure: Ordering, - ) -> bool; + ) -> (MaybeUninit, bool); /// Stores a value from `new` into `dst` if the current value is the same as /// the value at `current`, writes the previous value to `out`. @@ -209,14 +177,6 @@ pub trait AtomicCompareExchange: AtomicLoad + AtomicStore { /// - `dst` must be valid for both reads and writes. /// - `dst` must be properly aligned **to the size of `Self`**. /// (For example, if `Self` is `u128`, `dst` must be aligned to 16-byte even if the alignment of `u128` is 8-byte.) - /// - `dst` must go through [`UnsafeCell::get`]. - /// - `dst` must *not* overlap with `current`, `new`, or `out`. - /// - `current` must be valid for reads. - /// - `current` must be properly aligned. - /// - `new` must be valid for reads. - /// - `new` must be properly aligned. - /// - `out` must be valid for writes. - /// - `out` must be properly aligned. /// - `success` must be [`SeqCst`], [`AcqRel`], [`Acquire`], [`Release`], or [`Relaxed`]. /// - `failure` must be [`SeqCst`], [`Acquire`], or [`Relaxed`]. /// @@ -236,13 +196,12 @@ pub trait AtomicCompareExchange: AtomicLoad + AtomicStore { #[inline] unsafe fn atomic_compare_exchange_weak( dst: *mut MaybeUninit, - current: *const MaybeUninit, - new: *const MaybeUninit, - out: *mut MaybeUninit, + current: MaybeUninit, + new: MaybeUninit, success: Ordering, failure: Ordering, - ) -> bool { + ) -> (MaybeUninit, bool) { // SAFETY: the caller must uphold the safety contract. - unsafe { Self::atomic_compare_exchange(dst, current, new, out, success, failure) } + unsafe { Self::atomic_compare_exchange(dst, current, new, success, failure) } } } diff --git a/src/utils.rs b/src/utils.rs index 07cc5892..2404f6de 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -4,7 +4,10 @@ #[path = "gen/utils.rs"] mod gen; -use core::sync::atomic::Ordering; +use core::{ + mem::{self, MaybeUninit}, + sync::atomic::Ordering, +}; macro_rules! static_assert { ($cond:expr $(, $($msg:tt)*)?) => { @@ -88,3 +91,161 @@ pub(crate) fn upgrade_success_ordering(success: Ordering, failure: Ordering) -> _ => success, } } + +#[allow(dead_code)] +#[inline] +pub(crate) fn zero_extend(v: MaybeUninit) -> T::Out { + T::zero_extend(v) +} +pub(crate) trait ZeroExtend: Copy { + type Out: Copy; + fn zero_extend(v: MaybeUninit) -> Self::Out; +} +#[derive(Clone, Copy)] +#[repr(C)] +struct ZeroExtend8 { + #[cfg(target_endian = "big")] + pad: [u8; 3], + v: MaybeUninit, + #[cfg(target_endian = "little")] + pad: [u8; 3], +} +#[derive(Clone, Copy)] +#[repr(C)] +struct ZeroExtend16 { + #[cfg(target_endian = "big")] + pad: u16, + v: MaybeUninit, + #[cfg(target_endian = "little")] + pad: u16, +} +macro_rules! zero_extend { + (8; $ty:ident) => { + impl ZeroExtend for $ty { + type Out = MaybeUninit; + #[inline] + fn zero_extend(v: MaybeUninit<$ty>) -> Self::Out { + #[allow(clippy::useless_transmute)] + // SAFETY: we can safely transmute any 32-bit value to MaybeUninit. + unsafe { + mem::transmute(ZeroExtend8 { v: mem::transmute(v), pad: [0; 3] }) + } + } + } + }; + (16; $ty:ident) => { + impl ZeroExtend for $ty { + type Out = MaybeUninit; + #[inline] + fn zero_extend(v: MaybeUninit<$ty>) -> Self::Out { + #[allow(clippy::useless_transmute)] + // SAFETY: we can safely transmute any 32-bit value to MaybeUninit. + unsafe { + mem::transmute(ZeroExtend16 { v: mem::transmute(v), pad: 0 }) + } + } + } + }; + ($ty:ident) => { + impl ZeroExtend for $ty { + type Out = MaybeUninit<$ty>; + #[inline] + fn zero_extend(v: MaybeUninit<$ty>) -> Self::Out { + v + } + } + }; +} +zero_extend!(8; i8); +zero_extend!(8; u8); +zero_extend!(16; i16); +zero_extend!(16; u16); +zero_extend!(i32); +zero_extend!(u32); +zero_extend!(i64); +zero_extend!(u64); +zero_extend!(isize); +zero_extend!(usize); + +#[cfg(any( + target_arch = "aarch64", + target_arch = "powerpc64", + target_arch = "s390x", + target_arch = "x86_64", +))] +pub(crate) use imp::Pair128 as Pair; +#[allow(dead_code)] +#[cfg(any( + target_arch = "aarch64", + target_arch = "powerpc64", + target_arch = "s390x", + target_arch = "x86_64", +))] +/// A 128-bit value represented as a pair of 64-bit values. +/// +/// This type is `#[repr(C)]`, both fields have the same in-memory representation +/// and are plain old data types, so access to the fields is always safe. +#[derive(Clone, Copy)] +#[repr(C)] +pub(crate) union MaybeUninit128 { + pub(crate) u128: MaybeUninit, + pub(crate) i128: MaybeUninit, + pub(crate) pair: Pair, +} + +#[cfg(any(target_arch = "arm", target_arch = "hexagon", target_arch = "x86"))] +pub(crate) use imp::Pair64 as Pair; +#[allow(dead_code)] +#[cfg(any(target_arch = "arm", target_arch = "hexagon", target_arch = "x86"))] +/// A 64-bit value represented as a pair of 32-bit values. +/// +/// This type is `#[repr(C)]`, both fields have the same in-memory representation +/// and are plain old data types, so access to the fields is always safe. +#[derive(Clone, Copy)] +#[repr(C)] +pub(crate) union MaybeUninit64 { + pub(crate) u64: MaybeUninit, + pub(crate) i64: MaybeUninit, + pub(crate) pair: Pair, +} + +// little endian order +#[allow(dead_code)] +#[cfg(any(target_endian = "little", target_arch = "aarch64", target_arch = "arm"))] +mod imp { + use core::mem::MaybeUninit; + // A pair of 32-bit values. + #[derive(Clone, Copy)] + #[repr(C)] + pub(crate) struct Pair64 { + pub(crate) lo: MaybeUninit, + pub(crate) hi: MaybeUninit, + } + // A pair of 64-bit values. + #[derive(Clone, Copy)] + #[repr(C)] + pub(crate) struct Pair128 { + pub(crate) lo: MaybeUninit, + pub(crate) hi: MaybeUninit, + } +} +// big endian order +#[allow(dead_code)] +#[cfg(not(any(target_endian = "little", target_arch = "aarch64", target_arch = "arm")))] +mod imp { + use core::mem::MaybeUninit; + // A pair of 32-bit values. + #[derive(Clone, Copy)] + #[repr(C)] + pub(crate) struct Pair64 { + pub(crate) hi: MaybeUninit, + pub(crate) lo: MaybeUninit, + } + // A pair of 64-bit values. + #[derive(Clone, Copy)] + #[repr(C)] + pub(crate) struct Pair128 { + pub(crate) hi: MaybeUninit, + pub(crate) lo: MaybeUninit, + } +} diff --git a/tools/build.sh b/tools/build.sh index 10110690..fbd1822d 100755 --- a/tools/build.sh +++ b/tools/build.sh @@ -53,7 +53,7 @@ default_targets=( armv7-unknown-linux-gnueabihf thumbv7neon-unknown-linux-gnueabihf # armv7-a big endian - armebv7-unknown-linux-gnueabi + armebv7-unknown-linux-gnueabi # custom target # armv8-a armv8a-none-eabi # custom target # armv8-a big endian @@ -128,7 +128,9 @@ default_targets=( # rustc --print target-list | grep -E '^hexagon' hexagon-unknown-linux-musl ) -known_cfgs=() +known_cfgs=( + atomic_maybe_uninit_use_cp15_barrier +) x() { local cmd="$1"