diff --git a/benches/new_box_zeroed_with_elems_dynamic_padding.x86-64 b/benches/new_box_zeroed_with_elems_dynamic_padding.x86-64 index 0ab9f379a2..f17fa38e63 100644 --- a/benches/new_box_zeroed_with_elems_dynamic_padding.x86-64 +++ b/benches/new_box_zeroed_with_elems_dynamic_padding.x86-64 @@ -15,9 +15,7 @@ bench_new_box_zeroed_with_elems_dynamic_padding: and r14d, 3 add r14, rax setb al - movabs rcx, 9223372036854775803 - cmp r14, rcx - seta cl + sets cl or cl, al je .LBB5_4 .LBB5_6: diff --git a/benches/new_box_zeroed_with_elems_dynamic_padding.x86-64.mca b/benches/new_box_zeroed_with_elems_dynamic_padding.x86-64.mca index f666a03ce9..a3dded82de 100644 --- a/benches/new_box_zeroed_with_elems_dynamic_padding.x86-64.mca +++ b/benches/new_box_zeroed_with_elems_dynamic_padding.x86-64.mca @@ -1,12 +1,12 @@ Iterations: 100 -Instructions: 3200 -Total Cycles: 2989 -Total uOps: 4300 +Instructions: 3000 +Total Cycles: 2990 +Total uOps: 4000 Dispatch Width: 4 -uOps Per Cycle: 1.44 -IPC: 1.07 -Block RThroughput: 10.8 +uOps Per Cycle: 1.34 +IPC: 1.00 +Block RThroughput: 10.0 Instruction Info: @@ -34,9 +34,7 @@ Instruction Info: 1 1 0.33 and r14d, 3 1 1 0.33 add r14, rax 1 1 0.50 setb al - 1 1 0.33 movabs rcx, 9223372036854775803 - 1 1 0.33 cmp r14, rcx - 2 2 1.00 seta cl + 1 1 0.50 sets cl 1 1 0.33 or cl, al 1 1 1.00 je .LBB5_4 1 0 0.25 xor eax, eax @@ -65,16 +63,16 @@ Resources: Resource pressure per iteration: [0] [1] [2] [3] [4] [5] [6.0] [6.1] - - - 8.99 8.98 5.00 10.03 4.49 4.51 + - - 7.04 7.99 5.00 9.97 4.50 4.50 Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - - - - 1.00 - 0.49 0.51 push r14 - - - - - 1.00 - 0.51 0.49 push rbx - - - - - 1.00 - 0.49 0.51 push rax - - - 0.95 0.04 - 0.01 - - mov rbx, rdi - - - - 0.97 - 0.03 - - mov ecx, 3 - - - 0.02 0.02 - 0.96 - - mov rax, rdi + - - - - 1.00 - 0.50 0.50 push r14 + - - - - 1.00 - 0.50 0.50 push rbx + - - - - 1.00 - 0.50 0.50 push rax + - - 0.96 0.02 - 0.02 - - mov rbx, rdi + - - 0.01 0.98 - 0.01 - - mov ecx, 3 + - - 0.04 0.01 - 0.95 - - mov rax, rdi - - 1.00 1.00 - - - - mul rcx - - - - - 1.00 - - jo .LBB5_6 - - 0.02 0.97 - 0.01 - - mov r14, rax @@ -82,22 +80,20 @@ Resource pressure by instruction: - - - - - 1.00 - - ja .LBB5_6 - - 0.99 0.01 - - - - lea rax, [r14 + 9] - - 0.01 0.99 - - - - not r14d - - - 0.97 0.03 - - - - and r14d, 3 - - - 0.01 0.98 - 0.01 - - add r14, rax + - - 0.04 0.96 - - - - and r14d, 3 + - - 0.01 0.03 - 0.96 - - add r14, rax - - 1.00 - - - - - setb al - - - 0.02 - - 0.98 - - movabs rcx, 9223372036854775803 - - - - 0.97 - 0.03 - - cmp r14, rcx - - - 2.00 - - - - - seta cl - - - 0.03 0.03 - 0.94 - - or cl, al + - - - - - 1.00 - - sets cl + - - 0.97 0.03 - - - - or cl, al - - - - - 1.00 - - je .LBB5_4 - - - - - - - - xor eax, eax - - - - - 1.00 - - jmp .LBB5_5 - - - - - 1.00 1.00 1.02 0.98 call qword ptr [rip + __rustc::__rust_no_alloc_shim_is_unstable_v2@GOTPCREL] - - - 0.03 0.97 - - - - mov esi, 4 - - - 0.96 0.01 - 0.03 - - mov rdi, r14 - - - - - 1.00 1.00 0.98 1.02 call qword ptr [rip + __rustc::__rust_alloc_zeroed@GOTPCREL] - - - - 0.97 - 0.03 - - mov rdx, rbx - - - 0.01 0.99 - - - - add rsp, 8 + - - - - 1.00 1.00 1.00 1.00 call qword ptr [rip + __rustc::__rust_no_alloc_shim_is_unstable_v2@GOTPCREL] + - - 0.97 0.03 - - - - mov esi, 4 + - - - 1.00 - - - - mov rdi, r14 + - - - - 1.00 1.00 1.00 1.00 call qword ptr [rip + __rustc::__rust_alloc_zeroed@GOTPCREL] + - - 0.03 0.95 - 0.02 - - mov rdx, rbx + - - 0.02 0.98 - - - - add rsp, 8 - - - - - - 0.50 0.50 pop rbx - - - - - - 0.50 0.50 pop r14 - - - - - 1.00 - - ret diff --git a/benches/new_box_zeroed_with_elems_dynamic_size.x86-64 b/benches/new_box_zeroed_with_elems_dynamic_size.x86-64 index 175fff0fd3..8bd2565912 100644 --- a/benches/new_box_zeroed_with_elems_dynamic_size.x86-64 +++ b/benches/new_box_zeroed_with_elems_dynamic_size.x86-64 @@ -3,18 +3,20 @@ bench_new_box_zeroed_with_elems_dynamic_size: push rbx push rax mov rbx, rdi - movabs rax, 4611686018427387900 + movabs rax, 9223372036854775805 cmp rdi, rax - jbe .LBB5_2 - xor eax, eax - jmp .LBB5_3 -.LBB5_2: - lea r14, [2*rbx + 4] + ja .LBB5_3 + lea r14, [rbx + rbx] + add r14, 4 + js .LBB5_3 call qword ptr [rip + __rustc::__rust_no_alloc_shim_is_unstable_v2@GOTPCREL] mov esi, 2 mov rdi, r14 call qword ptr [rip + __rustc::__rust_alloc_zeroed@GOTPCREL] + jmp .LBB5_4 .LBB5_3: + xor eax, eax +.LBB5_4: mov rdx, rbx add rsp, 8 pop rbx diff --git a/benches/new_box_zeroed_with_elems_dynamic_size.x86-64.mca b/benches/new_box_zeroed_with_elems_dynamic_size.x86-64.mca index 88b5f84b98..cdf029614d 100644 --- a/benches/new_box_zeroed_with_elems_dynamic_size.x86-64.mca +++ b/benches/new_box_zeroed_with_elems_dynamic_size.x86-64.mca @@ -1,12 +1,12 @@ Iterations: 100 -Instructions: 1900 +Instructions: 2100 Total Cycles: 2990 -Total uOps: 2800 +Total uOps: 3000 Dispatch Width: 4 -uOps Per Cycle: 0.94 -IPC: 0.64 -Block RThroughput: 7.0 +uOps Per Cycle: 1.00 +IPC: 0.70 +Block RThroughput: 7.5 Instruction Info: @@ -22,16 +22,18 @@ Instruction Info: 2 5 1.00 * push rbx 2 5 1.00 * push rax 1 1 0.33 mov rbx, rdi - 1 1 0.33 movabs rax, 4611686018427387900 + 1 1 0.33 movabs rax, 9223372036854775805 1 1 0.33 cmp rdi, rax - 1 1 1.00 jbe .LBB5_2 - 1 0 0.25 xor eax, eax - 1 1 1.00 jmp .LBB5_3 - 1 1 0.50 lea r14, [2*rbx + 4] + 1 1 1.00 ja .LBB5_3 + 1 1 0.50 lea r14, [rbx + rbx] + 1 1 0.33 add r14, 4 + 1 1 1.00 js .LBB5_3 4 7 1.00 * call qword ptr [rip + __rustc::__rust_no_alloc_shim_is_unstable_v2@GOTPCREL] 1 1 0.33 mov esi, 2 1 1 0.33 mov rdi, r14 4 7 1.00 * call qword ptr [rip + __rustc::__rust_alloc_zeroed@GOTPCREL] + 1 1 1.00 jmp .LBB5_4 + 1 0 0.25 xor eax, eax 1 1 0.33 mov rdx, rbx 1 1 0.33 add rsp, 8 1 6 0.50 * pop rbx @@ -52,26 +54,28 @@ Resources: Resource pressure per iteration: [0] [1] [2] [3] [4] [5] [6.0] [6.1] - - - 3.97 3.97 5.00 5.06 4.50 4.50 + - - 4.49 4.50 5.00 6.01 4.50 4.50 Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - - - 1.00 - 0.50 0.50 push r14 - - - - 1.00 - 0.50 0.50 push rbx - - - - 1.00 - 0.50 0.50 push rax - - - 0.94 0.05 - 0.01 - - mov rbx, rdi - - - 0.05 0.95 - - - - movabs rax, 4611686018427387900 - - - 0.95 - - 0.05 - - cmp rdi, rax - - - - - - 1.00 - - jbe .LBB5_2 - - - - - - - - - xor eax, eax - - - - - - 1.00 - - jmp .LBB5_3 - - - - 1.00 - - - - lea r14, [2*rbx + 4] + - - 0.49 0.50 - 0.01 - - mov rbx, rdi + - - 0.50 0.50 - - - - movabs rax, 9223372036854775805 + - - 0.50 0.50 - - - - cmp rdi, rax + - - - - - 1.00 - - ja .LBB5_3 + - - 0.50 0.50 - - - - lea r14, [rbx + rbx] + - - 0.50 0.50 - - - - add r14, 4 + - - - - - 1.00 - - js .LBB5_3 - - - - 1.00 1.00 1.00 1.00 call qword ptr [rip + __rustc::__rust_no_alloc_shim_is_unstable_v2@GOTPCREL] - - - 0.06 0.94 - - - - mov esi, 2 - - - 0.94 0.06 - - - - mov rdi, r14 + - - 0.50 0.50 - - - - mov esi, 2 + - - 0.50 0.50 - - - - mov rdi, r14 - - - - 1.00 1.00 1.00 1.00 call qword ptr [rip + __rustc::__rust_alloc_zeroed@GOTPCREL] - - - 0.05 0.95 - - - - mov rdx, rbx - - - 0.98 0.02 - - - - add rsp, 8 + - - - - - 1.00 - - jmp .LBB5_4 + - - - - - - - - xor eax, eax + - - 0.51 0.49 - - - - mov rdx, rbx + - - 0.49 0.51 - - - - add rsp, 8 - - - - - - 0.50 0.50 pop rbx - - - - - - 0.50 0.50 pop r14 - - - - - 1.00 - - ret diff --git a/benches/new_vec_zeroed.x86-64 b/benches/new_vec_zeroed.x86-64 index 264fa4a852..cf889c0503 100644 --- a/benches/new_vec_zeroed.x86-64 +++ b/benches/new_vec_zeroed.x86-64 @@ -1,34 +1,29 @@ bench_new_vec_zeroed: push r15 push r14 - push r12 push rbx - push rax mov rbx, rdi - movabs r12, 9223372036854775805 mov ecx, 6 mov rax, rsi mul rcx jo .LBB5_6 - cmp rax, r12 - jbe .LBB5_2 -.LBB5_6: - add r12, 3 - mov qword ptr [rbx], r12 - jmp .LBB5_7 -.LBB5_2: mov r14, rsi - test rax, rax - je .LBB5_3 mov r15, rax + test rax, rax + je .LBB5_2 + test r15, r15 + js .LBB5_6 call qword ptr [rip + __rustc::__rust_no_alloc_shim_is_unstable_v2@GOTPCREL] mov esi, 2 mov rdi, r15 call qword ptr [rip + __rustc::__rust_alloc_zeroed@GOTPCREL] test rax, rax jne .LBB5_5 - jmp .LBB5_6 -.LBB5_3: +.LBB5_6: + movabs rax, -9223372036854775808 + mov qword ptr [rbx], rax + jmp .LBB5_7 +.LBB5_2: mov eax, 2 .LBB5_5: mov qword ptr [rbx], r14 @@ -36,9 +31,7 @@ bench_new_vec_zeroed: mov qword ptr [rbx + 16], r14 .LBB5_7: mov rax, rbx - add rsp, 8 pop rbx - pop r12 pop r14 pop r15 ret diff --git a/benches/new_vec_zeroed.x86-64.mca b/benches/new_vec_zeroed.x86-64.mca index 093bbde096..1ea5dddf80 100644 --- a/benches/new_vec_zeroed.x86-64.mca +++ b/benches/new_vec_zeroed.x86-64.mca @@ -1,12 +1,12 @@ Iterations: 100 -Instructions: 3800 -Total Cycles: 5277 -Total uOps: 5000 +Instructions: 3200 +Total Cycles: 3386 +Total uOps: 4200 Dispatch Width: 4 -uOps Per Cycle: 0.95 -IPC: 0.72 -Block RThroughput: 12.5 +uOps Per Cycle: 1.24 +IPC: 0.95 +Block RThroughput: 10.5 Instruction Info: @@ -20,39 +20,33 @@ Instruction Info: [1] [2] [3] [4] [5] [6] Instructions: 2 5 1.00 * push r15 2 5 1.00 * push r14 - 2 5 1.00 * push r12 2 5 1.00 * push rbx - 2 5 1.00 * push rax 1 1 0.33 mov rbx, rdi - 1 1 0.33 movabs r12, 9223372036854775805 1 1 0.33 mov ecx, 6 1 1 0.33 mov rax, rsi 2 4 1.00 mul rcx 1 1 1.00 jo .LBB5_6 - 1 1 0.33 cmp rax, r12 - 1 1 1.00 jbe .LBB5_2 - 1 1 0.33 add r12, 3 - 1 1 1.00 * mov qword ptr [rbx], r12 - 1 1 1.00 jmp .LBB5_7 1 1 0.33 mov r14, rsi - 1 1 0.33 test rax, rax - 1 1 1.00 je .LBB5_3 1 1 0.33 mov r15, rax + 1 1 0.33 test rax, rax + 1 1 1.00 je .LBB5_2 + 1 1 0.33 test r15, r15 + 1 1 1.00 js .LBB5_6 4 7 1.00 * call qword ptr [rip + __rustc::__rust_no_alloc_shim_is_unstable_v2@GOTPCREL] 1 1 0.33 mov esi, 2 1 1 0.33 mov rdi, r15 4 7 1.00 * call qword ptr [rip + __rustc::__rust_alloc_zeroed@GOTPCREL] 1 1 0.33 test rax, rax 1 1 1.00 jne .LBB5_5 - 1 1 1.00 jmp .LBB5_6 + 1 1 0.33 movabs rax, -9223372036854775808 + 1 1 1.00 * mov qword ptr [rbx], rax + 1 1 1.00 jmp .LBB5_7 1 1 0.33 mov eax, 2 1 1 1.00 * mov qword ptr [rbx], r14 1 1 1.00 * mov qword ptr [rbx + 8], rax 1 1 1.00 * mov qword ptr [rbx + 16], r14 1 1 0.33 mov rax, rbx - 1 1 0.33 add rsp, 8 1 6 0.50 * pop rbx - 1 6 0.50 * pop r12 1 6 0.50 * pop r14 1 6 0.50 * pop r15 1 1 1.00 U ret @@ -71,45 +65,39 @@ Resources: Resource pressure per iteration: [0] [1] [2] [3] [4] [5] [6.0] [6.1] - - - 7.01 7.98 11.00 11.01 8.50 8.50 + - - 6.01 6.03 9.00 10.96 7.00 7.00 Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - - - 1.00 - 0.50 0.50 push r15 - - - - 1.00 - 0.50 0.50 push r14 - - - - - 1.00 - 0.50 0.50 push r12 - - - - 1.00 - 0.50 0.50 push rbx - - - - - 1.00 - 0.50 0.50 push rax - - - 0.98 0.01 - 0.01 - - mov rbx, rdi - - - 0.01 0.99 - - - - movabs r12, 9223372036854775805 - - - 0.02 - - 0.98 - - mov ecx, 6 - - - - 0.98 - 0.02 - - mov rax, rsi + - - 0.03 0.96 - 0.01 - - mov rbx, rdi + - - 0.96 0.04 - - - - mov ecx, 6 + - - 0.02 - - 0.98 - - mov rax, rsi - - 1.00 1.00 - - - - mul rcx - - - - - 1.00 - - jo .LBB5_6 - - - 0.99 0.01 - - - - cmp rax, r12 - - - - - - 1.00 - - jbe .LBB5_2 - - - - - - 1.00 - - add r12, 3 - - - - - 1.00 - 0.50 0.50 mov qword ptr [rbx], r12 - - - - - - 1.00 - - jmp .LBB5_7 - - - 0.98 0.02 - - - - mov r14, rsi - - - 0.01 0.99 - - - - test rax, rax - - - - - - 1.00 - - je .LBB5_3 - - - 0.99 0.01 - - - - mov r15, rax + - - - - - 1.00 - - mov r14, rsi + - - - 1.00 - - - - mov r15, rax + - - 1.00 - - - - - test rax, rax + - - - - - 1.00 - - je .LBB5_2 + - - - 1.00 - - - - test r15, r15 + - - - - - 1.00 - - js .LBB5_6 - - - - 1.00 1.00 1.00 1.00 call qword ptr [rip + __rustc::__rust_no_alloc_shim_is_unstable_v2@GOTPCREL] - - - 0.01 0.99 - - - - mov esi, 2 - - - 0.99 0.01 - - - - mov rdi, r15 + - - 0.99 - - 0.01 - - mov esi, 2 + - - 0.01 0.99 - - - - mov rdi, r15 - - - - 1.00 1.00 1.00 1.00 call qword ptr [rip + __rustc::__rust_alloc_zeroed@GOTPCREL] - - - 0.01 0.99 - - - - test rax, rax + - - 0.99 0.01 - - - - test rax, rax - - - - - 1.00 - - jne .LBB5_5 - - - - - - 1.00 - - jmp .LBB5_6 - - - 0.02 0.98 - - - - mov eax, 2 + - - 0.01 0.03 - 0.96 - - movabs rax, -9223372036854775808 + - - - - 1.00 - 0.50 0.50 mov qword ptr [rbx], rax + - - - - - 1.00 - - jmp .LBB5_7 + - - 0.03 0.97 - - - - mov eax, 2 - - - - 1.00 - 0.50 0.50 mov qword ptr [rbx], r14 - - - - 1.00 - 0.50 0.50 mov qword ptr [rbx + 8], rax - - - - 1.00 - 0.50 0.50 mov qword ptr [rbx + 16], r14 - - 0.97 0.03 - - - - mov rax, rbx - - - 0.03 0.97 - - - - add rsp, 8 - - - - - - 0.50 0.50 pop rbx - - - - - - - 0.50 0.50 pop r12 - - - - - - 0.50 0.50 pop r14 - - - - - - 0.50 0.50 pop r15 - - - - - 1.00 - - ret diff --git a/src/util/mod.rs b/src/util/mod.rs index ccc5166fdd..5edb5fe0e7 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -382,29 +382,24 @@ pub(crate) unsafe fn new_box( where T: ?Sized + crate::KnownLayout, { + let align = T::LAYOUT.align.get(); let size = match T::size_for_metadata(meta) { Some(size) => size, None => return Err(AllocError), }; - - let align = T::LAYOUT.align.get(); - // On stable Rust versions <= 1.64.0, `Layout::from_size_align` has a bug in - // which sufficiently-large allocations (those which, when rounded up to the - // alignment, overflow `isize`) are not rejected, which can cause undefined - // behavior. See #64 for details. - // - // FIXME(#67): Once our MSRV is > 1.64.0, remove this assertion. - #[allow(clippy::as_conversions)] - let max_alloc = (isize::MAX as usize).saturating_sub(align); - if size > max_alloc { - return Err(AllocError); - } - - // FIXME(https://github.com/rust-lang/rust/issues/55724): Use - // `Layout::repeat` once it's stabilized. - let layout = Layout::from_size_align(size, align).or(Err(AllocError))?; - - let ptr = if layout.size() != 0 { + let ptr = if size != 0 { + #[allow(clippy::as_conversions)] + if size > isize::MAX as usize { + return Err(AllocError); + } + // SAFETY: + // - `align` is derived from a `NonZeroUsize` and is thus non-zero. + // - `align` is a power of two because, by invariant on + // `KnownLayout::LAYOUT` `::LAYOUT` accurately + // reflects the layout of `T`. + // - `size`, by invariant on `size_for_metadata` is well-aligned for + // `align` and, by the above conditional, is less than `isize::MAX`. + let layout: Layout = unsafe { Layout::from_size_align_unchecked(size, align) }; // SAFETY: By contract on the caller, `allocate` is either // `alloc::alloc::alloc` or `alloc::alloc::alloc_zeroed`. The above // check ensures their shared safety precondition: that the supplied @@ -420,8 +415,6 @@ where None => return Err(AllocError), } } else { - let align = T::LAYOUT.align.get(); - // We use `transmute` instead of an `as` cast since Miri (with strict // provenance enabled) notices and complains that an `as` cast creates a // pointer with no provenance. Miri isn't smart enough to realize that @@ -435,8 +428,8 @@ where #[allow(unknown_lints)] #[allow(clippy::useless_transmute, integer_to_ptr_transmutes)] let dangling = unsafe { mem::transmute::(align) }; - // SAFETY: `dangling` is constructed from `T::LAYOUT.align`, which is a - // `NonZeroUsize`, which is guaranteed to be non-zero. + // SAFETY: `dangling` is constructed from `align`, which is derived from + // a `NonZeroUsize`, which is guaranteed to be non-zero. // // `Box<[T]>` does not allocate when `T` is zero-sized or when `len` is // zero, but it does require a non-null dangling pointer for its