From bbdf78c061b6e685d1992dcdeac692fc2f8f0d34 Mon Sep 17 00:00:00 2001 From: iximeow Date: Mon, 1 Apr 2024 23:38:57 -0700 Subject: [PATCH 01/95] display opt: mem size labels and minor segment reporting changes for mem size labels: add one new "BUG" entry at the start of the array so `mem_size` does not need to be adjusted before being used to look up a string from the `MEM_SIZE_STRINGS` array. it's hard to measure the direct benefit of this, but it shrinks codegen size by a bit and simplfies a bit of assembly.... for segment reporting changes: stos/scas/lods do not actually need special segment override logic. instead, set their use of `es` when decoded, if appropriate. this is potentially ambiguous; in non-64bit modes the sequence `26aa` would decode as `stos` with explicit `es` prefix. this is now identical to simply decoding `aa`, which now also reports that there is an explicit `es` prefix even though there is no prefix on tne instruction. on the other hand, the prefix-reported segment now more accurately describes the memory selector through which memory accesses will happen. seems ok? --- src/lib.rs | 5 +++-- src/long_mode/display.rs | 8 ++++++-- src/long_mode/mod.rs | 17 ++--------------- src/protected_mode/display.rs | 4 ++-- src/real_mode/display.rs | 4 ++-- test/long_mode/mod.rs | 2 +- 6 files changed, 16 insertions(+), 24 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 188a37a..a7b8531 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -140,7 +140,8 @@ pub use real_mode::Arch as x86_16; mod safer_unchecked; -const MEM_SIZE_STRINGS: [&'static str; 64] = [ +const MEM_SIZE_STRINGS: [&'static str; 65] = [ + "BUG", "byte", "word", "BUG", "dword", "ptr", "far", "BUG", "qword", "BUG", "mword", "BUG", "BUG", "BUG", "BUG", "BUG", "xmmword", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", @@ -194,7 +195,7 @@ impl MemoryAccessSize { /// "variable" accesses access a number of bytes dependent on the physical processor and its /// operating mode. this is particularly relevant for `xsave`/`xrstor`-style instructions. pub fn size_name(&self) -> &'static str { - MEM_SIZE_STRINGS[self.size as usize - 1] + MEM_SIZE_STRINGS[self.size as usize] } } diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index 9c6795e..c1c6c65 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -132,6 +132,10 @@ pub(crate) fn regspec_label(spec: &RegSpec) -> &'static str { unsafe { REG_NAMES.get_kinda_unchecked((spec.num as u16 + ((spec.bank as u16) << 3)) as usize) } } +pub(crate) fn mem_size_label(size: u8) -> &'static str { + unsafe { MEM_SIZE_STRINGS.get_kinda_unchecked(size as usize) } +} + impl fmt::Display for RegSpec { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { f.write_str(regspec_label(self)) @@ -3415,7 +3419,7 @@ fn contextualize_intel(instr: &Instruction, colors: } if x.is_memory() { - out.write_str(MEM_SIZE_STRINGS[instr.mem_size as usize - 1])?; + out.write_str(MEM_SIZE_STRINGS[instr.mem_size as usize])?; out.write_str(" ")?; } @@ -3435,7 +3439,7 @@ fn contextualize_intel(instr: &Instruction, colors: out.write_str(", ")?; let x = Operand::from_spec(instr, instr.operands[i as usize]); if x.is_memory() { - out.write_str(MEM_SIZE_STRINGS[instr.mem_size as usize - 1])?; + out.write_str(MEM_SIZE_STRINGS[instr.mem_size as usize])?; out.write_str(" ")?; } if let Some(prefix) = instr.segment_override_for_op(i) { diff --git a/src/long_mode/mod.rs b/src/long_mode/mod.rs index 9bc9f0b..f58976a 100644 --- a/src/long_mode/mod.rs +++ b/src/long_mode/mod.rs @@ -4380,21 +4380,6 @@ impl Instruction { /// prefixes. pub fn segment_override_for_op(&self, op: u8) -> Option { match self.opcode { - Opcode::STOS | - Opcode::SCAS => { - if op == 0 { - Some(Segment::ES) - } else { - None - } - } - Opcode::LODS => { - if op == 1 { - Some(self.prefixes.segment) - } else { - None - } - } Opcode::MOVS => { if op == 0 { Some(Segment::ES) @@ -9002,6 +8987,7 @@ fn read_operands< } OperandCase::Yb_AL => { instruction.regs[0] = RegSpec::al(); + instruction.prefixes.segment = Segment::ES; if instruction.prefixes.address_size() { instruction.regs[1] = RegSpec::edi(); } else { @@ -9028,6 +9014,7 @@ fn read_operands< let bank = bank_from_prefixes_64(SizeCode::vqp, instruction.prefixes); instruction.regs[0].num = 0; instruction.regs[0].bank = bank; + instruction.prefixes.segment = Segment::ES; if instruction.prefixes.address_size() { instruction.regs[1] = RegSpec::edi(); } else { diff --git a/src/protected_mode/display.rs b/src/protected_mode/display.rs index 55cab9e..89b7565 100644 --- a/src/protected_mode/display.rs +++ b/src/protected_mode/display.rs @@ -3436,7 +3436,7 @@ fn contextualize_intel(instr: &Instruction, colors: } if x.is_memory() { - out.write_str(MEM_SIZE_STRINGS[instr.mem_size as usize - 1])?; + out.write_str(MEM_SIZE_STRINGS[instr.mem_size as usize])?; out.write_str(" ")?; } @@ -3456,7 +3456,7 @@ fn contextualize_intel(instr: &Instruction, colors: out.write_str(", ")?; let x = Operand::from_spec(instr, instr.operands[i as usize]); if x.is_memory() { - out.write_str(MEM_SIZE_STRINGS[instr.mem_size as usize - 1])?; + out.write_str(MEM_SIZE_STRINGS[instr.mem_size as usize])?; out.write_str(" ")?; } if let Some(prefix) = instr.segment_override_for_op(i) { diff --git a/src/real_mode/display.rs b/src/real_mode/display.rs index 9450a39..6472c6c 100644 --- a/src/real_mode/display.rs +++ b/src/real_mode/display.rs @@ -3436,7 +3436,7 @@ fn contextualize_intel(instr: &Instruction, colors: } if x.is_memory() { - out.write_str(MEM_SIZE_STRINGS[instr.mem_size as usize - 1])?; + out.write_str(MEM_SIZE_STRINGS[instr.mem_size as usize])?; out.write_str(" ")?; } @@ -3456,7 +3456,7 @@ fn contextualize_intel(instr: &Instruction, colors: out.write_str(", ")?; let x = Operand::from_spec(instr, instr.operands[i as usize]); if x.is_memory() { - out.write_str(MEM_SIZE_STRINGS[instr.mem_size as usize - 1])?; + out.write_str(MEM_SIZE_STRINGS[instr.mem_size as usize])?; out.write_str(" ")?; } if let Some(prefix) = instr.segment_override_for_op(i) { diff --git a/test/long_mode/mod.rs b/test/long_mode/mod.rs index 8cb032c..b725287 100644 --- a/test/long_mode/mod.rs +++ b/test/long_mode/mod.rs @@ -2751,7 +2751,7 @@ fn only_64bit() { test_display(&[0xae], "scas byte es:[rdi], al"); test_display(&[0xaf], "scas dword es:[rdi], eax"); test_display(&[0x67, 0xaf], "scas dword es:[edi], eax"); - test_display(&[0x67, 0xac], "lods al, byte ds:[esi]"); + test_display(&[0x67, 0xac], "lods al, byte [esi]"); test_display(&[0x67, 0xaa], "stos byte es:[edi], al"); // note that rax.b does *not* change the register test_display(&[0x4f, 0xa0, 0x12, 0x34, 0x12, 0x34, 0x12, 0x34, 0x12, 0x34], "mov al, byte [0x3412341234123412]"); From 27c0d462eec5200be1e4cc5a24353a66b97c159c Mon Sep 17 00:00:00 2001 From: iximeow Date: Tue, 2 Apr 2024 00:42:29 -0700 Subject: [PATCH 02/95] swap test order for segment override applicability it is almost always the case that self.prefixes.segment == Segment::DS, meaning testing for it first avoids checking `self.operands[op].is_memory()` later. this overall avoids a few instructions in the typical path, rather than checking `is_memory()` first (which would always be true in the places this function is called from) --- src/long_mode/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/long_mode/mod.rs b/src/long_mode/mod.rs index f58976a..5bdd786 100644 --- a/src/long_mode/mod.rs +++ b/src/long_mode/mod.rs @@ -4400,8 +4400,8 @@ impl Instruction { }, _ => { // most operands are pretty simple: - if self.operands[op as usize].is_memory() && - self.prefixes.segment != Segment::DS { + if self.prefixes.segment != Segment::DS && + self.operands[op as usize].is_memory() { Some(self.prefixes.segment) } else { None From 3291884e4c51cfd93ae0f84681c5dd00e2e8544d Mon Sep 17 00:00:00 2001 From: iximeow Date: Tue, 2 Apr 2024 00:54:58 -0700 Subject: [PATCH 03/95] display: gate rep printing with a simpler check testing against six opcodes to see if we should print rep or repnz is a bit absurd. they are relatively rare instructions, so this is a long sequence of never-taken tests. we can avoid the whole thing in the common case by testing if there is any kind of rep prefix at all. --- src/long_mode/display.rs | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index c1c6c65..1a31073 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -3862,12 +3862,14 @@ impl ShowContextual Date: Tue, 2 Apr 2024 01:52:38 -0700 Subject: [PATCH 04/95] lets see how a visitor for operands works out here... --- src/long_mode/display.rs | 310 +++++++++++++++++++++++++++++++++++---- src/long_mode/mod.rs | 199 +++++++++++++++++++++++++ 2 files changed, 482 insertions(+), 27 deletions(-) diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index 1a31073..5237868 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -349,6 +349,257 @@ impl Colorize for Operand { } } +struct ColorizingOperandVisitor<'a, T, Y> { + instr: &'a Instruction, + op_nr: u8, + colors: &'a Y, + f: &'a mut T, +} + +impl crate::long_mode::OperandVisitor for ColorizingOperandVisitor<'_, T, Y> { + type Ok = (); + type Error = core::fmt::Error; + + fn visit_u8(&mut self, imm: u8) -> Result { + write!(self.f, "{}", self.colors.number(u8_hex(imm))) + } + fn visit_i8(&mut self, imm: i8) -> Result { + write!(self.f, "{}", + self.colors.number(signed_i8_hex(imm))) + } + fn visit_u16(&mut self, imm: u16) -> Result { + write!(self.f, "{}", self.colors.number(u16_hex(imm))) + } + fn visit_i16(&mut self, imm: i16) -> Result { + write!(self.f, "{}", + self.colors.number(signed_i16_hex(imm))) + } + fn visit_u32(&mut self, imm: u32) -> Result { + write!(self.f, "{}", self.colors.number(u32_hex(imm))) + } + fn visit_i32(&mut self, imm: i32) -> Result { + write!(self.f, "{}", + self.colors.number(signed_i32_hex(imm))) + } + fn visit_u64(&mut self, imm: u64) -> Result { + write!(self.f, "{}", self.colors.number(u64_hex(imm))) + } + fn visit_i64(&mut self, imm: i64) -> Result { + write!(self.f, "{}", + self.colors.number(signed_i64_hex(imm))) + } + fn visit_reg(&mut self, reg: RegSpec) -> Result { + self.f.write_str(regspec_label(®)) + } + fn visit_reg_mask_merge(&mut self, spec: RegSpec, mask: RegSpec, merge_mode: MergeMode) -> Result { + self.f.write_str(regspec_label(&spec))?; + if mask.num != 0 { + self.f.write_str("{")?; + self.f.write_str(regspec_label(&mask))?; + self.f.write_str("}")?; + } + if let MergeMode::Zero = merge_mode { + self.f.write_str("{z}")?; + } + Ok(()) + } + fn visit_reg_mask_merge_sae(&mut self, spec: RegSpec, mask: RegSpec, merge_mode: MergeMode, sae_mode: crate::long_mode::SaeMode) -> Result { + self.f.write_str(regspec_label(&spec))?; + if mask.num != 0 { + self.f.write_str("{")?; + self.f.write_str(regspec_label(&mask))?; + self.f.write_str("}")?; + } + if let MergeMode::Zero = merge_mode { + self.f.write_str("{z}")?; + } + self.f.write_str(sae_mode.label())?; + Ok(()) + } + fn visit_reg_mask_merge_sae_noround(&mut self, spec: RegSpec, mask: RegSpec, merge_mode: MergeMode) -> Result { + self.f.write_str(regspec_label(&spec))?; + if mask.num != 0 { + self.f.write_str("{")?; + self.f.write_str(regspec_label(&mask))?; + self.f.write_str("}")?; + } + if let MergeMode::Zero = merge_mode { + self.f.write_str("{z}")?; + } + self.f.write_str("{sae}")?; + Ok(()) + } + fn visit_abs_u32(&mut self, imm: u32) -> Result { + self.f.write_str(MEM_SIZE_STRINGS[self.instr.mem_size as usize])?; + self.f.write_str(" ")?; + if let Some(prefix) = self.instr.segment_override_for_op(self.op_nr) { + write!(self.f, "{}:", prefix)?; + } + write!(self.f, "[{}]", self.colors.address(u32_hex(imm))) + } + fn visit_abs_u64(&mut self, imm: u64) -> Result { + self.f.write_str(MEM_SIZE_STRINGS[self.instr.mem_size as usize])?; + self.f.write_str(" ")?; + if let Some(prefix) = self.instr.segment_override_for_op(self.op_nr) { + write!(self.f, "{}:", prefix)?; + } + write!(self.f, "[{}]", self.colors.address(u64_hex(imm))) + } + fn visit_disp(&mut self, reg: RegSpec, disp: i32) -> Result { + self.f.write_str(MEM_SIZE_STRINGS[self.instr.mem_size as usize])?; + self.f.write_str(" ")?; + if let Some(prefix) = self.instr.segment_override_for_op(self.op_nr) { + write!(self.f, "{}:", prefix)?; + } + write!(self.f, "[{} ", regspec_label(®))?; + format_number_i32(self.colors, self.f, disp, NumberStyleHint::HexSignedWithSignSplit)?; + write!(self.f, "]") + } + fn visit_deref(&mut self, reg: RegSpec) -> Result { + self.f.write_str(MEM_SIZE_STRINGS[self.instr.mem_size as usize])?; + self.f.write_str(" ")?; + if let Some(prefix) = self.instr.segment_override_for_op(self.op_nr) { + write!(self.f, "{}:", prefix)?; + } + self.f.write_str("[")?; + self.f.write_str(regspec_label(®))?; + self.f.write_str("]") + } + fn visit_reg_scale(&mut self, reg: RegSpec, scale: u8) -> Result { + self.f.write_str(MEM_SIZE_STRINGS[self.instr.mem_size as usize])?; + self.f.write_str(" ")?; + if let Some(prefix) = self.instr.segment_override_for_op(self.op_nr) { + write!(self.f, "{}:", prefix)?; + } + write!(self.f, "[{} * {}]", + regspec_label(®), + self.colors.number(scale) + ) + } + fn visit_reg_scale_disp(&mut self, reg: RegSpec, scale: u8, disp: i32) -> Result { + self.f.write_str(MEM_SIZE_STRINGS[self.instr.mem_size as usize])?; + self.f.write_str(" ")?; + if let Some(prefix) = self.instr.segment_override_for_op(self.op_nr) { + write!(self.f, "{}:", prefix)?; + } + write!(self.f, "[{} * {} ", + regspec_label(®), + self.colors.number(scale), + )?; + format_number_i32(self.colors, self.f, disp, NumberStyleHint::HexSignedWithSignSplit)?; + write!(self.f, "]") + } + fn visit_index_base_scale(&mut self, base: RegSpec, index: RegSpec, scale: u8) -> Result { + self.f.write_str(MEM_SIZE_STRINGS[self.instr.mem_size as usize])?; + self.f.write_str(" ")?; + if let Some(prefix) = self.instr.segment_override_for_op(self.op_nr) { + write!(self.f, "{}:", prefix)?; + } + write!(self.f, "[{} + {} * {}]", + regspec_label(&base), + regspec_label(&index), + self.colors.number(scale) + ) + } + fn visit_index_base_scale_disp(&mut self, base: RegSpec, index: RegSpec, scale: u8, disp: i32) -> Result { + self.f.write_str(MEM_SIZE_STRINGS[self.instr.mem_size as usize])?; + self.f.write_str(" ")?; + if let Some(prefix) = self.instr.segment_override_for_op(self.op_nr) { + write!(self.f, "{}:", prefix)?; + } + write!(self.f, "[{} + {} * {} ", + regspec_label(&base), + regspec_label(&index), + self.colors.number(scale), + )?; + format_number_i32(self.colors, self.f, disp, NumberStyleHint::HexSignedWithSignSplit)?; + write!(self.f, "]") + } + fn visit_reg_disp_masked(&mut self, spec: RegSpec, disp: i32, mask_reg: RegSpec) -> Result { + self.f.write_str(MEM_SIZE_STRINGS[self.instr.mem_size as usize])?; + self.f.write_str(" ")?; + write!(self.f, "[{} ", regspec_label(&spec))?; + format_number_i32(self.colors, self.f, disp, NumberStyleHint::HexSignedWithSignSplit)?; + write!(self.f, "]")?; + write!(self.f, "{{{}}}", regspec_label(&mask_reg)) + } + fn visit_reg_deref_masked(&mut self, spec: RegSpec, mask_reg: RegSpec) -> Result { + self.f.write_str(MEM_SIZE_STRINGS[self.instr.mem_size as usize])?; + self.f.write_str(" ")?; + self.f.write_str("[")?; + self.f.write_str(regspec_label(&spec))?; + self.f.write_str("]")?; + write!(self.f, "{{{}}}", regspec_label(&mask_reg)) + } + fn visit_reg_scale_masked(&mut self, spec: RegSpec, scale: u8, mask_reg: RegSpec) -> Result { + self.f.write_str(MEM_SIZE_STRINGS[self.instr.mem_size as usize])?; + self.f.write_str(" ")?; + write!(self.f, "[{} * {}]", + regspec_label(&spec), + self.colors.number(scale) + )?; + write!(self.f, "{{{}}}", regspec_label(&mask_reg)) + } + fn visit_reg_scale_disp_masked(&mut self, spec: RegSpec, scale: u8, disp: i32, mask_reg: RegSpec) -> Result { + self.f.write_str(MEM_SIZE_STRINGS[self.instr.mem_size as usize])?; + self.f.write_str(" ")?; + write!(self.f, "[{} * {} ", + regspec_label(&spec), + self.colors.number(scale), + )?; + format_number_i32(self.colors, self.f, disp, NumberStyleHint::HexSignedWithSignSplit)?; + write!(self.f, "]")?; + write!(self.f, "{{{}}}", regspec_label(&mask_reg)) + } + fn visit_index_base_masked(&mut self, base: RegSpec, index: RegSpec, mask_reg: RegSpec) -> Result { + self.f.write_str(MEM_SIZE_STRINGS[self.instr.mem_size as usize])?; + self.f.write_str(" ")?; + self.f.write_str("[")?; + self.f.write_str(regspec_label(&base))?; + self.f.write_str(" + ")?; + self.f.write_str(regspec_label(&index))?; + self.f.write_str("]")?; + write!(self.f, "{{{}}}", regspec_label(&mask_reg)) + } + fn visit_index_base_disp_masked(&mut self, base: RegSpec, index: RegSpec, disp: i32, mask_reg: RegSpec) -> Result { + self.f.write_str(MEM_SIZE_STRINGS[self.instr.mem_size as usize])?; + self.f.write_str(" ")?; + write!(self.f, "[{} + {} ", + regspec_label(&base), + regspec_label(&index), + )?; + format_number_i32(self.colors, self.f, disp, NumberStyleHint::HexSignedWithSignSplit)?; + write!(self.f, "]")?; + write!(self.f, "{{{}}}", regspec_label(&mask_reg)) + } + fn visit_index_base_scale_masked(&mut self, base: RegSpec, index: RegSpec, scale: u8, mask_reg: RegSpec) -> Result { + self.f.write_str(MEM_SIZE_STRINGS[self.instr.mem_size as usize])?; + self.f.write_str(" ")?; + write!(self.f, "[{} + {} * {}]", + regspec_label(&base), + regspec_label(&index), + self.colors.number(scale) + )?; + write!(self.f, "{{{}}}", regspec_label(&mask_reg)) + } + fn visit_index_base_scale_disp_masked(&mut self, base: RegSpec, index: RegSpec, scale: u8, disp: i32, mask_reg: RegSpec) -> Result { + self.f.write_str(MEM_SIZE_STRINGS[self.instr.mem_size as usize])?; + self.f.write_str(" ")?; + write!(self.f, "[{} + {} * {} ", + regspec_label(&base), + regspec_label(&index), + self.colors.number(scale), + )?; + format_number_i32(self.colors, self.f, disp, NumberStyleHint::HexSignedWithSignSplit)?; + write!(self.f, "]")?; + write!(self.f, "{{{}}}", regspec_label(&mask_reg)) + } + + fn visit_other(&mut self) -> Result { + Ok(()) + } +} + impl fmt::Display for Opcode { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { f.write_str(self.name()) @@ -3381,8 +3632,6 @@ fn contextualize_intel(instr: &Instruction, colors: if instr.operand_count > 0 { out.write_str(" ")?; - let x = Operand::from_spec(instr, instr.operands[0]); - const RELATIVE_BRANCHES: [Opcode; 21] = [ Opcode::JMP, Opcode::JRCXZ, Opcode::LOOP, Opcode::LOOPZ, Opcode::LOOPNZ, @@ -3398,6 +3647,8 @@ fn contextualize_intel(instr: &Instruction, colors: if instr.operands[0] == OperandSpec::ImmI8 || instr.operands[0] == OperandSpec::ImmI32 { if RELATIVE_BRANCHES.contains(&instr.opcode) { + let x = Operand::from_spec(instr, instr.operands[0]); + return match x { Operand::ImmediateI8(rel) => { if rel >= 0 { @@ -3418,15 +3669,13 @@ fn contextualize_intel(instr: &Instruction, colors: } } - if x.is_memory() { - out.write_str(MEM_SIZE_STRINGS[instr.mem_size as usize])?; - out.write_str(" ")?; - } - - if let Some(prefix) = instr.segment_override_for_op(0) { - write!(out, "{}:", prefix)?; - } - x.colorize(colors, out)?; + let mut displayer = ColorizingOperandVisitor { + instr, + op_nr: 0, + f: out, + colors, + }; + instr.visit_operand(0 as u8, &mut displayer)?; for i in 1..instr.operand_count { match instr.opcode { @@ -3437,16 +3686,15 @@ fn contextualize_intel(instr: &Instruction, colors: }, _ => { out.write_str(", ")?; - let x = Operand::from_spec(instr, instr.operands[i as usize]); - if x.is_memory() { - out.write_str(MEM_SIZE_STRINGS[instr.mem_size as usize])?; - out.write_str(" ")?; - } - if let Some(prefix) = instr.segment_override_for_op(i) { - write!(out, "{}:", prefix)?; - } - x.colorize(colors, out)?; + let mut displayer = ColorizingOperandVisitor { + instr, + op_nr: i, + f: out, + colors, + }; + instr.visit_operand(i as u8, &mut displayer)?; if let Some(evex) = instr.prefixes.evex() { + let x = Operand::from_spec(instr, instr.operands[i as usize]); if evex.broadcast() && x.is_memory() { let scale = if instr.opcode == Opcode::VCVTPD2PS || instr.opcode == Opcode::VCVTTPD2UDQ || instr.opcode == Opcode::VCVTPD2UDQ || instr.opcode == Opcode::VCVTUDQ2PD || instr.opcode == Opcode::VCVTPS2PD || instr.opcode == Opcode::VCVTQQ2PS || instr.opcode == Opcode::VCVTDQ2PD || instr.opcode == Opcode::VCVTTPD2DQ || instr.opcode == Opcode::VFPCLASSPS || instr.opcode == Opcode::VFPCLASSPD || instr.opcode == Opcode::VCVTNEPS2BF16 || instr.opcode == Opcode::VCVTUQQ2PS || instr.opcode == Opcode::VCVTPD2DQ || instr.opcode == Opcode::VCVTTPS2UQQ || instr.opcode == Opcode::VCVTPS2UQQ || instr.opcode == Opcode::VCVTTPS2QQ || instr.opcode == Opcode::VCVTPS2QQ { if instr.opcode == Opcode::VFPCLASSPS || instr.opcode == Opcode::VCVTNEPS2BF16 { @@ -3889,8 +4137,14 @@ impl ShowContextual ShowContextual { write!(out, ", ")?; - if let Some(prefix) = self.segment_override_for_op(1) { - write!(out, "{}:", prefix)?; - } - let x = Operand::from_spec(self, self.operands[i]); - x.colorize(colors, out)? + let mut displayer = ColorizingOperandVisitor { + instr: self, + op_nr: i as u8, + f: out, + colors, + }; + self.visit_operand(i as u8, &mut displayer)?; } } } diff --git a/src/long_mode/mod.rs b/src/long_mode/mod.rs index 5bdd786..7aab852 100644 --- a/src/long_mode/mod.rs +++ b/src/long_mode/mod.rs @@ -541,6 +541,43 @@ impl SaeMode { SAE_MODES[idx] } } + +pub trait OperandVisitor { + type Ok; + type Error; + + fn visit_reg(&mut self, reg: RegSpec) -> Result; + fn visit_deref(&mut self, reg: RegSpec) -> Result; + fn visit_disp(&mut self, reg: RegSpec, disp: i32) -> Result; + fn visit_reg_scale(&mut self, reg: RegSpec, scale: u8) -> Result; + fn visit_index_base_scale(&mut self, base: RegSpec, index: RegSpec, scale: u8) -> Result; + fn visit_index_base_scale_disp(&mut self, base: RegSpec, index: RegSpec, scale: u8, disp: i32) -> Result; + fn visit_reg_scale_disp(&mut self, reg: RegSpec, scale: u8, disp: i32) -> Result; + fn visit_i8(&mut self, imm: i8) -> Result; + fn visit_u8(&mut self, imm: u8) -> Result; + fn visit_i16(&mut self, imm: i16) -> Result; + fn visit_u16(&mut self, imm: u16) -> Result; + fn visit_i32(&mut self, imm: i32) -> Result; + fn visit_u32(&mut self, imm: u32) -> Result; + fn visit_i64(&mut self, imm: i64) -> Result; + fn visit_u64(&mut self, imm: u64) -> Result; + fn visit_abs_u32(&mut self, imm: u32) -> Result; + fn visit_abs_u64(&mut self, imm: u64) -> Result; + fn visit_reg_mask_merge(&mut self, base: RegSpec, mask: RegSpec, merge_mode: MergeMode) -> Result; + fn visit_reg_mask_merge_sae(&mut self, base: RegSpec, mask: RegSpec, merge_mode: MergeMode, sae_mode: SaeMode) -> Result; + fn visit_reg_mask_merge_sae_noround(&mut self, base: RegSpec, mask: RegSpec, merge_mode: MergeMode) -> Result; + fn visit_reg_disp_masked(&mut self, base: RegSpec, disp: i32, mask_reg: RegSpec) -> Result; + fn visit_reg_deref_masked(&mut self, base: RegSpec, mask_reg: RegSpec) -> Result; + fn visit_reg_scale_masked(&mut self, base: RegSpec, scale: u8, mask_reg: RegSpec) -> Result; + fn visit_reg_scale_disp_masked(&mut self, base: RegSpec, scale: u8, disp: i32, mask_reg: RegSpec) -> Result; + fn visit_index_base_masked(&mut self, base: RegSpec, index: RegSpec, mask_reg: RegSpec) -> Result; + fn visit_index_base_disp_masked(&mut self, base: RegSpec, index: RegSpec, disp: i32, mask_reg: RegSpec) -> Result; + fn visit_index_base_scale_masked(&mut self, base: RegSpec, index: RegSpec, scale: u8, mask_reg: RegSpec) -> Result; + fn visit_index_base_scale_disp_masked(&mut self, base: RegSpec, index: RegSpec, scale: u8, disp: i32, mask_reg: RegSpec) -> Result; + + fn visit_other(&mut self) -> Result; +} + impl Operand { fn from_spec(inst: &Instruction, spec: OperandSpec) -> Operand { match spec { @@ -4323,6 +4360,168 @@ impl Instruction { Operand::from_spec(self, self.operands[i as usize]) } + /// TODO: make public, document, etc... + /// + /// `visit_operand` allows code using operands to better specialize and inline with the logic + /// that would construct an [`Operand`] variant, without having to necessarily construct an + /// `Operand` (including the attendant move of the enum). + /// + /// if the work you expect to do per-operand is very small, constructing an `Operand` and + /// dispatching on tags may be a substantial factor of overall runtime. `visit_operand` can + /// reduce total overhead in such cases. + fn visit_operand(&self, i: u8, visitor: &mut T) -> Result { + assert!(i < 4); + let spec = self.operands[i as usize]; + match spec { + OperandSpec::Nothing => { + visitor.visit_other() + } + OperandSpec::RegRRR => { + visitor.visit_reg(self.regs[0]) + } + OperandSpec::RegMMM => { + visitor.visit_reg(self.regs[1]) + } + OperandSpec::RegVex => { + visitor.visit_reg(self.regs[3]) + } + OperandSpec::Reg4 => { + visitor.visit_reg(RegSpec { num: self.imm as u8, bank: self.regs[3].bank }) + } + OperandSpec::Deref => { + visitor.visit_deref(self.regs[1]) + } + OperandSpec::Deref_esi => { +// visitor.visit_other() + visitor.visit_deref(RegSpec::esi()) + } + OperandSpec::Deref_edi => { +// visitor.visit_other() + visitor.visit_deref(RegSpec::edi()) + } + OperandSpec::Deref_rsi => { +// visitor.visit_other() + visitor.visit_deref(RegSpec::rsi()) + } + OperandSpec::Deref_rdi => { +// visitor.visit_other() + visitor.visit_deref(RegSpec::rdi()) + } + OperandSpec::RegDisp => { + visitor.visit_disp(self.regs[1], self.disp as i32) + } + OperandSpec::RegRRR_maskmerge => { + visitor.visit_reg_mask_merge( + self.regs[0], + RegSpec::mask(self.prefixes.evex_unchecked().mask_reg()), + MergeMode::from(self.prefixes.evex_unchecked().merge()), + ) + } + OperandSpec::RegRRR_maskmerge_sae => { + visitor.visit_reg_mask_merge_sae( + self.regs[0], + RegSpec::mask(self.prefixes.evex_unchecked().mask_reg()), + MergeMode::from(self.prefixes.evex_unchecked().merge()), + SaeMode::from(self.prefixes.evex_unchecked().vex().l(), self.prefixes.evex_unchecked().lp()), + ) + } + OperandSpec::RegRRR_maskmerge_sae_noround => { + visitor.visit_reg_mask_merge_sae_noround( + self.regs[0], + RegSpec::mask(self.prefixes.evex_unchecked().mask_reg()), + MergeMode::from(self.prefixes.evex_unchecked().merge()), + ) + } + OperandSpec::RegMMM_maskmerge => { + visitor.visit_reg_mask_merge( + self.regs[1], + RegSpec::mask(self.prefixes.evex_unchecked().mask_reg()), + MergeMode::from(self.prefixes.evex_unchecked().merge()), + ) + } + OperandSpec::RegMMM_maskmerge_sae_noround => { + visitor.visit_reg_mask_merge_sae_noround( + self.regs[1], + RegSpec::mask(self.prefixes.evex_unchecked().mask_reg()), + MergeMode::from(self.prefixes.evex_unchecked().merge()), + ) + } + OperandSpec::RegVex_maskmerge => { + visitor.visit_reg_mask_merge( + self.regs[3], + RegSpec::mask(self.prefixes.evex_unchecked().mask_reg()), + MergeMode::from(self.prefixes.evex_unchecked().merge()), + ) + } + OperandSpec::ImmI8 => visitor.visit_i8(self.imm as i8), + OperandSpec::ImmU8 => visitor.visit_u8(self.imm as u8), + OperandSpec::ImmI16 => visitor.visit_i16(self.imm as i16), + OperandSpec::ImmU16 => visitor.visit_u16(self.imm as u16), + OperandSpec::ImmI32 => visitor.visit_i32(self.imm as i32), + OperandSpec::ImmI64 => visitor.visit_i64(self.imm as i64), + OperandSpec::ImmInDispField => visitor.visit_u16(self.disp as u16), + OperandSpec::DispU32 => visitor.visit_abs_u32(self.disp as u32), + OperandSpec::DispU64 => visitor.visit_abs_u64(self.disp as u64), + OperandSpec::RegScale => { + visitor.visit_reg_scale(self.regs[2], self.scale) + } + OperandSpec::RegScaleDisp => { + visitor.visit_reg_scale_disp(self.regs[2], self.scale, self.disp as i32) + } + OperandSpec::RegIndexBaseScale => { + visitor.visit_index_base_scale(self.regs[1], self.regs[2], self.scale) + /* + Operand::RegIndexBaseScale(self.regs[1], self.regs[2], self.scale) + */ + } + OperandSpec::RegIndexBaseScaleDisp => { + visitor.visit_index_base_scale_disp(self.regs[1], self.regs[2], self.scale, self.disp as i32) + } + OperandSpec::Deref_mask => { + if self.prefixes.evex_unchecked().mask_reg() != 0 { + visitor.visit_reg_deref_masked(self.regs[1], RegSpec::mask(self.prefixes.evex_unchecked().mask_reg())) + } else { + visitor.visit_deref(self.regs[1]) + } + } + OperandSpec::RegDisp_mask => { + if self.prefixes.evex_unchecked().mask_reg() != 0 { + visitor.visit_reg_disp_masked(self.regs[1], self.disp as i32, RegSpec::mask(self.prefixes.evex_unchecked().mask_reg())) + } else { + visitor.visit_disp(self.regs[1], self.disp as i32) + } + } + OperandSpec::RegScale_mask => { + if self.prefixes.evex_unchecked().mask_reg() != 0 { + visitor.visit_reg_scale_masked(self.regs[2], self.scale, RegSpec::mask(self.prefixes.evex_unchecked().mask_reg())) + } else { + visitor.visit_reg_scale(self.regs[2], self.scale) + } + } + OperandSpec::RegScaleDisp_mask => { + if self.prefixes.evex_unchecked().mask_reg() != 0 { + visitor.visit_reg_scale_disp_masked(self.regs[2], self.scale, self.disp as i32, RegSpec::mask(self.prefixes.evex_unchecked().mask_reg())) + } else { + visitor.visit_reg_scale_disp(self.regs[2], self.scale, self.disp as i32) + } + } + OperandSpec::RegIndexBaseScale_mask => { + if self.prefixes.evex_unchecked().mask_reg() != 0 { + visitor.visit_index_base_scale_masked(self.regs[1], self.regs[2], self.scale, RegSpec::mask(self.prefixes.evex_unchecked().mask_reg())) + } else { + visitor.visit_index_base_scale(self.regs[1], self.regs[2], self.scale) + } + } + OperandSpec::RegIndexBaseScaleDisp_mask => { + if self.prefixes.evex_unchecked().mask_reg() != 0 { + visitor.visit_index_base_scale_disp_masked(self.regs[1], self.regs[2], self.scale, self.disp as i32, RegSpec::mask(self.prefixes.evex_unchecked().mask_reg())) + } else { + visitor.visit_index_base_scale_disp(self.regs[1], self.regs[2], self.scale, self.disp as i32) + } + } + } + } + /// get the number of operands in this instruction. useful in iterating an instruction's /// operands generically. pub fn operand_count(&self) -> u8 { From ed4f238a4c2d860e6fadc8abeaa0cba36ed1df8a Mon Sep 17 00:00:00 2001 From: iximeow Date: Tue, 2 Apr 2024 02:05:02 -0700 Subject: [PATCH 05/95] less write, more write_str --- src/long_mode/display.rs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index 5237868..644bd82 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -449,11 +449,14 @@ impl crate::long_mode::OperandVisitor for Colorizi self.f.write_str(MEM_SIZE_STRINGS[self.instr.mem_size as usize])?; self.f.write_str(" ")?; if let Some(prefix) = self.instr.segment_override_for_op(self.op_nr) { - write!(self.f, "{}:", prefix)?; + write!(self.f, "{}", prefix)?; + self.f.write_str(":")?; } - write!(self.f, "[{} ", regspec_label(®))?; + self.f.write_str("[")?; + self.f.write_str(regspec_label(®))?; + self.f.write_str(" ")?; format_number_i32(self.colors, self.f, disp, NumberStyleHint::HexSignedWithSignSplit)?; - write!(self.f, "]") + self.f.write_str("]") } fn visit_deref(&mut self, reg: RegSpec) -> Result { self.f.write_str(MEM_SIZE_STRINGS[self.instr.mem_size as usize])?; From 050bc1c972bc69b963429753b939cefc04812321 Mon Sep 17 00:00:00 2001 From: iximeow Date: Tue, 2 Apr 2024 02:13:03 -0700 Subject: [PATCH 06/95] display: remove some pointless checks the match on opcode should have been dce, match on operands would only matter if there was a bug --- src/long_mode/display.rs | 119 ++++++++++++++++++--------------------- 1 file changed, 55 insertions(+), 64 deletions(-) diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index 644bd82..b6cca48 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -3681,72 +3681,63 @@ fn contextualize_intel(instr: &Instruction, colors: instr.visit_operand(0 as u8, &mut displayer)?; for i in 1..instr.operand_count { - match instr.opcode { - _ => { - match &instr.operands[i as usize] { - &OperandSpec::Nothing => { - return Ok(()); - }, - _ => { - out.write_str(", ")?; - let mut displayer = ColorizingOperandVisitor { - instr, - op_nr: i, - f: out, - colors, - }; - instr.visit_operand(i as u8, &mut displayer)?; - if let Some(evex) = instr.prefixes.evex() { - let x = Operand::from_spec(instr, instr.operands[i as usize]); - if evex.broadcast() && x.is_memory() { - let scale = if instr.opcode == Opcode::VCVTPD2PS || instr.opcode == Opcode::VCVTTPD2UDQ || instr.opcode == Opcode::VCVTPD2UDQ || instr.opcode == Opcode::VCVTUDQ2PD || instr.opcode == Opcode::VCVTPS2PD || instr.opcode == Opcode::VCVTQQ2PS || instr.opcode == Opcode::VCVTDQ2PD || instr.opcode == Opcode::VCVTTPD2DQ || instr.opcode == Opcode::VFPCLASSPS || instr.opcode == Opcode::VFPCLASSPD || instr.opcode == Opcode::VCVTNEPS2BF16 || instr.opcode == Opcode::VCVTUQQ2PS || instr.opcode == Opcode::VCVTPD2DQ || instr.opcode == Opcode::VCVTTPS2UQQ || instr.opcode == Opcode::VCVTPS2UQQ || instr.opcode == Opcode::VCVTTPS2QQ || instr.opcode == Opcode::VCVTPS2QQ { - if instr.opcode == Opcode::VFPCLASSPS || instr.opcode == Opcode::VCVTNEPS2BF16 { - if evex.vex().l() { - 8 - } else if evex.lp() { - 16 - } else { - 4 - } - } else if instr.opcode == Opcode::VFPCLASSPD { - if evex.vex().l() { - 4 - } else if evex.lp() { - 8 - } else { - 2 - } - } else { - // vcvtpd2ps is "cool": in broadcast mode, it can read a - // double-precision float (qword), resize to single-precision, - // then broadcast that to the whole destination register. this - // means we need to show `xmm, qword [addr]{1to4}` if vector - // size is 256. likewise, scale of 8 for the same truncation - // reason if vector size is 512. - // vcvtudq2pd is the same story. - // vfpclassp{s,d} is a mystery to me. - if evex.vex().l() { - 4 - } else if evex.lp() { - 8 - } else { - 2 - } - } - } else { - // this should never be `None` - that would imply two - // memory operands for a broadcasted operation. - if let Some(width) = Operand::from_spec(instr, instr.operands[i as usize - 1]).width() { - width / instr.mem_size - } else { - 0 - } - }; - write!(out, "{{1to{}}}", scale)?; - } + // don't worry about checking for `instr.operands[i] != Nothing`, it would be a bug to + // reach that while iterating only to `operand_count`.. + out.write_str(", ")?; + let mut displayer = ColorizingOperandVisitor { + instr, + op_nr: i, + f: out, + colors, + }; + instr.visit_operand(i as u8, &mut displayer)?; + if let Some(evex) = instr.prefixes.evex() { + let x = Operand::from_spec(instr, instr.operands[i as usize]); + if evex.broadcast() && x.is_memory() { + let scale = if instr.opcode == Opcode::VCVTPD2PS || instr.opcode == Opcode::VCVTTPD2UDQ || instr.opcode == Opcode::VCVTPD2UDQ || instr.opcode == Opcode::VCVTUDQ2PD || instr.opcode == Opcode::VCVTPS2PD || instr.opcode == Opcode::VCVTQQ2PS || instr.opcode == Opcode::VCVTDQ2PD || instr.opcode == Opcode::VCVTTPD2DQ || instr.opcode == Opcode::VFPCLASSPS || instr.opcode == Opcode::VFPCLASSPD || instr.opcode == Opcode::VCVTNEPS2BF16 || instr.opcode == Opcode::VCVTUQQ2PS || instr.opcode == Opcode::VCVTPD2DQ || instr.opcode == Opcode::VCVTTPS2UQQ || instr.opcode == Opcode::VCVTPS2UQQ || instr.opcode == Opcode::VCVTTPS2QQ || instr.opcode == Opcode::VCVTPS2QQ { + if instr.opcode == Opcode::VFPCLASSPS || instr.opcode == Opcode::VCVTNEPS2BF16 { + if evex.vex().l() { + 8 + } else if evex.lp() { + 16 + } else { + 4 + } + } else if instr.opcode == Opcode::VFPCLASSPD { + if evex.vex().l() { + 4 + } else if evex.lp() { + 8 + } else { + 2 + } + } else { + // vcvtpd2ps is "cool": in broadcast mode, it can read a + // double-precision float (qword), resize to single-precision, + // then broadcast that to the whole destination register. this + // means we need to show `xmm, qword [addr]{1to4}` if vector + // size is 256. likewise, scale of 8 for the same truncation + // reason if vector size is 512. + // vcvtudq2pd is the same story. + // vfpclassp{s,d} is a mystery to me. + if evex.vex().l() { + 4 + } else if evex.lp() { + 8 + } else { + 2 } } - } + } else { + // this should never be `None` - that would imply two + // memory operands for a broadcasted operation. + if let Some(width) = Operand::from_spec(instr, instr.operands[i as usize - 1]).width() { + width / instr.mem_size + } else { + 0 + } + }; + write!(out, "{{1to{}}}", scale)?; } } } From 214da3dc5cbea216f5a3eb601a46e882bbf69a92 Mon Sep 17 00:00:00 2001 From: iximeow Date: Thu, 13 Jun 2024 15:33:21 -0700 Subject: [PATCH 07/95] use a bit of Opcode to indicate rep/repne applicability this reduces a `slice::contains` to a single bit test, and regroups prefix printing to deduplicate checks of the `rep` prefix seemingly this reduces instruction counts by about 1%, cycles by 0.3% or so. --- src/long_mode/display.rs | 18 +++++++++--------- src/long_mode/mod.rs | 30 +++++++++++++++++++++--------- 2 files changed, 30 insertions(+), 18 deletions(-) diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index b6cca48..71bb1dd 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -638,6 +638,13 @@ const MNEMONICS: &[&'static str] = &[ "not", "xadd", "xchg", + "cmps", + "scas", + "movs", + "lods", + "stos", + "ins", + "outs", "invalid", "bt", "bsf", @@ -719,17 +726,10 @@ const MNEMONICS: &[&'static str] = &[ "cwd", "cdq", "cqo", - "lods", - "stos", "lahf", "sahf", - "cmps", - "scas", - "movs", "test", - "ins", "in", - "outs", "out", "imul", "jo", @@ -3613,7 +3613,7 @@ fn contextualize_intel(instr: &Instruction, colors: } if instr.prefixes.rep_any() { - if [Opcode::MOVS, Opcode::CMPS, Opcode::LODS, Opcode::STOS, Opcode::INS, Opcode::OUTS].contains(&instr.opcode) { + if instr.opcode.can_rep() { if instr.prefixes.rep() { write!(out, "rep ")?; } else if instr.prefixes.repnz() { @@ -3769,7 +3769,7 @@ fn contextualize_c(instr: &Instruction, colors: &Y, } if instr.prefixes.rep_any() { - if [Opcode::MOVS, Opcode::CMPS, Opcode::LODS, Opcode::STOS, Opcode::INS, Opcode::OUTS].contains(&instr.opcode) { + if instr.opcode.can_rep() { let word_str = match instr.mem_size { 1 => "byte", 2 => "word", diff --git a/src/long_mode/mod.rs b/src/long_mode/mod.rs index 7aab852..41d6f2d 100644 --- a/src/long_mode/mod.rs +++ b/src/long_mode/mod.rs @@ -1101,7 +1101,16 @@ pub enum Opcode { NOT = 0x1019, XADD = 0x101a, XCHG = 0x101b, - Invalid = 0x1c, + + CMPS = 0x201c, + SCAS = 0x201d, + MOVS = 0x201e, + LODS = 0x201f, + STOS = 0x2020, + INS = 0x2021, + OUTS = 0x2022, + + Invalid = 0x23, // XADD, BT, // BTS, @@ -1186,17 +1195,10 @@ pub enum Opcode { CWD, CDQ, CQO, - LODS, - STOS, LAHF, SAHF, - CMPS, - SCAS, - MOVS, TEST, - INS, IN, - OUTS, OUT, IMUL, JO, @@ -4338,6 +4340,16 @@ impl Opcode { _ => None, } } + + #[inline(always)] + fn can_lock(&self) -> bool { + (*self as u32) & 0x1000 != 0 + } + + #[inline(always)] + fn can_rep(&self) -> bool { + (*self as u32) & 0x2000 != 0 + } } impl Default for Instruction { @@ -6835,7 +6847,7 @@ fn read_with_annotations< self.read_operands(decoder, words, instruction, record, sink)?; if self.check_lock { - if (instruction.opcode as u32) < 0x1000 || !instruction.operands[0].is_memory() { + if !instruction.opcode.can_lock() || !instruction.operands[0].is_memory() { return Err(DecodeError::InvalidPrefixes); } } From fe2917be66f1ac0c138ab6f5628f6b73e7626952 Mon Sep 17 00:00:00 2001 From: iximeow Date: Sun, 16 Jun 2024 12:08:41 -0700 Subject: [PATCH 08/95] adapting contextualize_intel to use new operand visitor stuff the reasoning for *why* `visit_operand` is better here lives as doc comments on `visit_operand` itself: it avoids going from scattered operand details to `enum Operand` only to deconstruct the enum again. instead, branch arms can get codegen'd directly against `struct Instruction` layout. --- src/long_mode/display.rs | 250 +++++++++++++++++++++++++++++++++------ 1 file changed, 213 insertions(+), 37 deletions(-) diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index 71bb1dd..3cfab94 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -3595,6 +3595,20 @@ impl <'instr, T: fmt::Write, Y: YaxColors> Colorize for InstructionDisplay /// No per-operand context when contextualizing an instruction! struct NoContext; +// TODO: find a better place to put this.... +fn c_to_hex(c: u8) -> u8 { + /* + static CHARSET: &'static [u8; 16] = b"0123456789abcdef"; + CHARSET[c as usize] + */ + // the conditional branch below is faster than a lookup, yes + if c < 10 { + b'0' + c + } else { + b'a' + c - 10 + } +} + impl Instruction { pub fn write_to(&self, out: &mut T) -> fmt::Result { self.display_with(DisplayStyle::Intel).contextualize(&NoColors, 0, Some(&NoContext), out) @@ -3615,9 +3629,9 @@ fn contextualize_intel(instr: &Instruction, colors: if instr.prefixes.rep_any() { if instr.opcode.can_rep() { if instr.prefixes.rep() { - write!(out, "rep ")?; + out.write_str("rep ")?; } else if instr.prefixes.repnz() { - write!(out, "repnz ")?; + out.write_str("repnz ")?; } } } @@ -3635,41 +3649,12 @@ fn contextualize_intel(instr: &Instruction, colors: if instr.operand_count > 0 { out.write_str(" ")?; - const RELATIVE_BRANCHES: [Opcode; 21] = [ - Opcode::JMP, Opcode::JRCXZ, - Opcode::LOOP, Opcode::LOOPZ, Opcode::LOOPNZ, - Opcode::JO, Opcode::JNO, - Opcode::JB, Opcode::JNB, - Opcode::JZ, Opcode::JNZ, - Opcode::JNA, Opcode::JA, - Opcode::JS, Opcode::JNS, - Opcode::JP, Opcode::JNP, - Opcode::JL, Opcode::JGE, - Opcode::JLE, Opcode::JG, - ]; - - if instr.operands[0] == OperandSpec::ImmI8 || instr.operands[0] == OperandSpec::ImmI32 { - if RELATIVE_BRANCHES.contains(&instr.opcode) { - let x = Operand::from_spec(instr, instr.operands[0]); - - return match x { - Operand::ImmediateI8(rel) => { - if rel >= 0 { - write!(out, "$+{}", colors.number(signed_i32_hex(rel as i32))) - } else { - write!(out, "${}", colors.number(signed_i32_hex(rel as i32))) - } - } - Operand::ImmediateI32(rel) => { - if rel >= 0 { - write!(out, "$+{}", colors.number(signed_i32_hex(rel))) - } else { - write!(out, "${}", colors.number(signed_i32_hex(rel))) - } - } - _ => { unreachable!() } - }; - } + if instr.visit_operand(0, &mut RelativeBranchPrinter { + inst: instr, + colors, + out, + })? { + return Ok(()); } let mut displayer = ColorizingOperandVisitor { @@ -4167,3 +4152,194 @@ impl ShowContextual { + inst: &'a Instruction, + colors: &'a Y, + out: &'a mut F, +} + +impl<'a, Y: YaxColors, F: fmt::Write> crate::long_mode::OperandVisitor for RelativeBranchPrinter<'a, Y, F> { + // return true if we printed a relative branch offset, false otherwise + type Ok = bool; + // but errors are errors + type Error = fmt::Error; + + fn visit_reg(&mut self, _reg: RegSpec) -> Result { + Ok(false) + } + fn visit_deref(&mut self, _reg: RegSpec) -> Result { + Ok(false) + } + fn visit_disp(&mut self, _reg: RegSpec, _disp: i32) -> Result { + Ok(false) + } + #[cfg_attr(feature="profiling", inline(never))] + fn visit_i8(&mut self, rel: i8) -> Result { + if RELATIVE_BRANCHES.contains(&self.inst.opcode) { + self.out.write_char('$')?; + // danger_anguished_string_write(self.out, "$"); + let mut v = rel as u8; + if rel < 0 { + self.out.write_char('-')?; + //danger_anguished_string_write(&mut self.out, "-"); + v = -rel as u8; + } else { + self.out.write_char('+')?; + // danger_anguished_string_write(&mut self.out, "+"); + } + self.out.write_str("0x")?; + // danger_anguished_string_write(self.out, "0x"); + let mut buf = [core::mem::MaybeUninit::::uninit(); 2]; + let mut curr = buf.len(); + loop { + let digit = v % 16; + let c = c_to_hex(digit as u8); + curr -= 1; + buf[curr].write(c); + v = v / 16; + if v == 0 { + break; + } + } + let buf = &buf[curr..]; + let s = unsafe { + core::mem::transmute::<&[core::mem::MaybeUninit], &str>(buf) + }; + + self.out.write_str(s)?; +// anguished_string_write(&mut self.out, s); + Ok(true) + } else { + Ok(false) + } + } + #[cfg_attr(feature="profiling", inline(never))] + fn visit_i32(&mut self, rel: i32) -> Result { + if RELATIVE_BRANCHES.contains(&self.inst.opcode) || self.inst.opcode == Opcode::XBEGIN { + self.out.write_char('$')?; + // danger_anguished_string_write(self.out, "$"); + let mut v = rel as u32; + if rel < 0 { + self.out.write_char('-')?; + // danger_anguished_string_write(&mut self.out, "-"); + v = -rel as u32; + } else { + self.out.write_char('+')?; + // danger_anguished_string_write(&mut self.out, "+"); + } + self.out.write_str("0x")?; + // danger_anguished_string_write(self.out, "0x"); + let mut buf = [core::mem::MaybeUninit::::uninit(); 8]; + let mut curr = buf.len(); + loop { + let digit = v % 16; + let c = c_to_hex(digit as u8); + curr -= 1; + buf[curr].write(c); + v = v / 16; + if v == 0 { + break; + } + } + let buf = &buf[curr..]; + let s = unsafe { + core::mem::transmute::<&[core::mem::MaybeUninit], &str>(buf) + }; + +// danger_anguished_string_write(&mut self.out, s); + + // danger_anguished_variable_length_bstring_write(unsafe { self.out.as_mut_vec() }, s.as_bytes()); + self.out.write_str(s)?; + Ok(true) + } else { + Ok(false) + } + } + fn visit_u8(&mut self, _imm: u8) -> Result { + Ok(false) + } + fn visit_i16(&mut self, _imm: i16) -> Result { + Ok(false) + } + fn visit_u16(&mut self, _imm: u16) -> Result { + Ok(false) + } + fn visit_u32(&mut self, _imm: u32) -> Result { + Ok(false) + } + fn visit_i64(&mut self, _imm: i64) -> Result { + Ok(false) + } + fn visit_u64(&mut self, _imm: u64) -> Result { + Ok(false) + } + fn visit_abs_u32(&mut self, _imm: u32) -> Result { + Ok(false) + } + fn visit_abs_u64(&mut self, _imm: u64) -> Result { + Ok(false) + } + fn visit_reg_scale(&mut self, _reg: RegSpec, _scale: u8) -> Result { + Ok(false) + } + fn visit_index_base_scale(&mut self, _base: RegSpec, _index: RegSpec, _scale: u8) -> Result { + Ok(false) + } + fn visit_reg_scale_disp(&mut self, _reg: RegSpec, _scale: u8, _disp: i32) -> Result { + Ok(false) + } + fn visit_index_base_scale_disp(&mut self, _base: RegSpec, _index: RegSpec, _scale: u8, _disp: i32) -> Result { + Ok(false) + } + fn visit_other(&mut self) -> Result { + Ok(false) + } + fn visit_reg_mask_merge(&mut self, _spec: RegSpec, _mask: RegSpec, _merge_mode: MergeMode) -> Result { + Ok(false) + } + fn visit_reg_mask_merge_sae(&mut self, _spec: RegSpec, _mask: RegSpec, _merge_mode: MergeMode, _sae_mode: crate::long_mode::SaeMode) -> Result { + Ok(false) + } + fn visit_reg_mask_merge_sae_noround(&mut self, _spec: RegSpec, _mask: RegSpec, _merge_mode: MergeMode) -> Result { + Ok(false) + } + fn visit_reg_disp_masked(&mut self, _spec: RegSpec, _disp: i32, _mask_reg: RegSpec) -> Result { + Ok(false) + } + fn visit_reg_deref_masked(&mut self, _spec: RegSpec, _mask_reg: RegSpec) -> Result { + Ok(false) + } + fn visit_reg_scale_masked(&mut self, _spec: RegSpec, _scale: u8, _mask_reg: RegSpec) -> Result { + Ok(false) + } + fn visit_reg_scale_disp_masked(&mut self, _spec: RegSpec, _scale: u8, _disp: i32, _mask_reg: RegSpec) -> Result { + Ok(false) + } + fn visit_index_base_masked(&mut self, _base: RegSpec, _index: RegSpec, _mask_reg: RegSpec) -> Result { + Ok(false) + } + fn visit_index_base_disp_masked(&mut self, _base: RegSpec, _index: RegSpec, _disp: i32, _mask_reg: RegSpec) -> Result { + Ok(false) + } + fn visit_index_base_scale_masked(&mut self, _base: RegSpec, _index: RegSpec, _scale: u8, _mask_reg: RegSpec) -> Result { + Ok(false) + } + fn visit_index_base_scale_disp_masked(&mut self, _base: RegSpec, _index: RegSpec, _scale: u8, _disp: i32, _mask_reg: RegSpec) -> Result { + Ok(false) + } +} + From ead58f66b9818c1ac01232b1e137c0d2a6ea06a2 Mon Sep 17 00:00:00 2001 From: iximeow Date: Sun, 16 Jun 2024 12:23:05 -0700 Subject: [PATCH 09/95] commit unshippable wildly unsafe asm-filled printing code write_2 will never actually be used, but im adapting it into contextualize in a... better way --- src/long_mode/display.rs | 1323 ++++++++++++++++++++++++++++++++++++++ test/long_mode/mod.rs | 13 + 2 files changed, 1336 insertions(+) diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index 3cfab94..ba20c1c 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -89,6 +89,19 @@ impl fmt::Display for PrefixRex { } } +impl Segment { + fn name(&self) -> &'static [u8; 2] { + match self { + Segment::CS => b"cs", + Segment::DS => b"ds", + Segment::ES => b"es", + Segment::FS => b"fs", + Segment::GS => b"gs", + Segment::SS => b"ss", + } + } +} + impl fmt::Display for Segment { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { @@ -3595,6 +3608,54 @@ impl <'instr, T: fmt::Write, Y: YaxColors> Colorize for InstructionDisplay /// No per-operand context when contextualizing an instruction! struct NoContext; +extern crate alloc; + +trait Writable { + unsafe fn as_mut_vec(&mut self) -> &mut alloc::vec::Vec; + fn into_inner(self) -> T; +} + +impl Writable for alloc::string::String { + unsafe fn as_mut_vec(&mut self) -> &mut alloc::vec::Vec { + self.as_mut_vec() + } + fn into_inner(self) -> alloc::string::String { + self + } +} + +struct BigEnoughString { + content: alloc::string::String, +} + +impl Writable for BigEnoughString { + unsafe fn as_mut_vec(&mut self) -> &mut alloc::vec::Vec { + self.content.as_mut_vec() + } + fn into_inner(self) -> alloc::string::String { + self.content + } +} + +impl BigEnoughString { + pub fn from_string(mut s: alloc::string::String) -> Self { + s.reserve(256); + // safety: the string is large enough + unsafe { Self::from_string_unchecked(s) } + } + + pub fn new() -> Self { + Self::from_string(alloc::string::String::new()) + } + + /// safety: CALLER MUST ENSURE S IS LARGE ENOUGH TO HOLD ANY DISASSEMBLED x86 INSTRUCTION + unsafe fn from_string_unchecked(s: alloc::string::String) -> Self { + Self { + content: s + } + } +} + // TODO: find a better place to put this.... fn c_to_hex(c: u8) -> u8 { /* @@ -3610,6 +3671,1268 @@ fn c_to_hex(c: u8) -> u8 { } impl Instruction { +// pub fn write_2>(&self, out: &mut alloc::string::String) -> fmt::Result { + #[cfg_attr(feature="profiling", inline(never))] + pub fn write_2(&self, out: &mut alloc::string::String) -> fmt::Result { + use core::fmt::Write; + + unsafe { out.as_mut_vec().reserve(64) }; + + fn anguished_string_write(out: &mut alloc::string::String, label: &str) { + let new_bytes = label.as_bytes(); + let buf = unsafe { out.as_mut_vec() }; + anguished_bstring_write(buf, new_bytes); + } + fn anguished_bstring_write(buf: &mut alloc::vec::Vec, new_bytes: &[u8]) { + if new_bytes.len() >= 32 { + unsafe { core::hint::unreachable_unchecked() } + } + buf.reserve(new_bytes.len()); + for i in 0..new_bytes.len() { + unsafe { + buf.as_mut_ptr().offset(buf.len() as isize).offset(i as isize).write_volatile(new_bytes[i]); + } + } + unsafe { + buf.set_len(buf.len() + new_bytes.len()); + } + } + + fn danger_anguished_string_write(out: &mut alloc::string::String, label: &str) { + let new_bytes = label.as_bytes(); + let buf = unsafe { out.as_mut_vec() }; + danger_anguished_bstring_write(buf, new_bytes); + } + fn danger_anguished_bstring_write(buf: &mut alloc::vec::Vec, new_bytes: &[u8]) { + if new_bytes.len() >= 16 { + unsafe { core::hint::unreachable_unchecked() } + } + + unsafe { + let dest = buf.as_mut_ptr().offset(buf.len() as isize); + let src = new_bytes.as_ptr(); + + let mut rem = new_bytes.len() as isize; + unsafe { + buf.set_len(buf.len() + new_bytes.len()); + } + /* + while rem % 4 > 0 { + dest.offset(rem - 1).write_unaligned(src.offset(rem - 1).read_unaligned()); + rem -= 1; + } + + while rem > 0 { + (dest.offset(rem - 4) as *mut u32).write_unaligned(unsafe { + *core::mem::transmute::<&u8, &u32>(&new_bytes[rem as usize - 4]) + }); + rem -= 4; + } + */ + /* + unsafe { + core::arch::asm!( + "7:", + "cmp {rem}, 4", + "jb 8f", + "sub {rem}, 4", + "mov {buf:e}, dword ptr [{src} + {rem}]", + "mov dword ptr [{dest} + {rem}], {buf:e}", + "jmp 7b", + "8:", + "test {rem}, {rem}", + "jz 10f", + "sub {rem}, 1", + "mov {buf:l}, byte ptr [{src} + {rem}]", + "mov byte ptr [{dest} + {rem}], {buf:l}", + "jnz 8b", + "10:", + src = in(reg) src, + dest = in(reg) dest, + rem = in(reg) rem, +// tmp = out(reg) _, + buf = out(reg) _, + options(nostack), + ); + } + */ + /* + unsafe { + core::arch::asm!( + "mov {tmp}, {rem}", + "and {tmp}, 3", + "je 3f", + "sub {rem}, {tmp}", + "2:", + "mov {buf:l}, byte ptr [{src}]", + "mov byte ptr [{dest}], {buf:l}", + "add {src}, 1", + "add {dest}, 1", + "sub {tmp}, 1", + "jnz 2b", + "3:", + "test {rem}, {rem}", + "jz 5f", + "4:", + "sub {rem}, 4", + "mov {buf:e}, dword ptr [{src} + {rem}]", + "mov dword ptr [{dest} + {rem}], {buf:e}", + "jnz 4b", + "5:", + src = in(reg) src, + dest = in(reg) dest, + rem = in(reg) rem, + tmp = out(reg) _, + buf = out(reg) _, + ); + } + */ + /* + */ + dest.offset(0 as isize).write(new_bytes[0]); + for i in 1..new_bytes.len() { + unsafe { + dest.offset(i as isize).write(new_bytes[i]); + } + } + // } + } + } + + fn danger_anguished_variable_length_string_write(out: &mut alloc::string::String, label: &str) { + let new_bytes = label.as_bytes(); + let buf = unsafe { out.as_mut_vec() }; + danger_anguished_variable_length_bstring_write(buf, new_bytes); + } + #[cfg_attr(feature="profiling", inline(never))] + fn danger_anguished_variable_length_bstring_write(buf: &mut alloc::vec::Vec, new_bytes: &[u8]) { + if new_bytes.len() >= 16 { + unsafe { core::hint::unreachable_unchecked() } + } + if new_bytes.len() == 0 { + unsafe { core::hint::unreachable_unchecked() } + } + + unsafe { + let dest = buf.as_mut_ptr().offset(buf.len() as isize); + let src = new_bytes.as_ptr(); + + let mut rem = new_bytes.len() as isize; + unsafe { + buf.set_len(buf.len() + new_bytes.len()); + } + /* + while rem % 4 > 0 { + dest.offset(rem - 1).write_unaligned(src.offset(rem - 1).read_unaligned()); + rem -= 1; + } + + while rem > 0 { + (dest.offset(rem - 4) as *mut u32).write_unaligned(unsafe { + *core::mem::transmute::<&u8, &u32>(&new_bytes[rem as usize - 4]) + }); + rem -= 4; + } + */ + unsafe { + /* + if rem >= 8 { + rem -= 8; + (dest.offset(rem) as *mut u64).write_unaligned((src.offset(rem) as *const u64).read_unaligned()) + } + if rem >= 4 { + rem -= 4; + (dest.offset(rem) as *mut u32).write_unaligned((src.offset(rem) as *const u32).read_unaligned()); + if rem == 0 { + return; + } + } + if rem >= 2 { + rem -= 2; + (dest.offset(rem) as *mut u16).write_unaligned((src.offset(rem) as *const u16).read_unaligned()); + if rem == 0 { + return; + } + } + if rem >= 1 { + rem -= 1; + (dest.offset(rem) as *mut u8).write_unaligned((src.offset(rem) as *const u8).read_unaligned()) + } + */ + core::arch::asm!( + "7:", + "cmp {rem:e}, 8", + "jb 8f", + "mov {buf:r}, qword ptr [{src} + {rem} - 8]", + "mov qword ptr [{dest} + {rem} - 8], {buf:r}", + "sub {rem:e}, 8", + "jz 11f", + "8:", + "cmp {rem:e}, 4", + "jb 9f", + "mov {buf:e}, dword ptr [{src} + {rem} - 4]", + "mov dword ptr [{dest} + {rem} - 4], {buf:e}", + "sub {rem:e}, 4", + "jz 11f", + "9:", + "cmp {rem:e}, 2", + "jb 10f", + "mov {buf:x}, word ptr [{src} + {rem} - 2]", + "mov word ptr [{dest} + {rem} - 2], {buf:x}", + "sub {rem:e}, 2", + "jz 11f", + "10:", + "cmp {rem:e}, 1", + "jb 11f", + "mov {buf:l}, byte ptr [{src} + {rem} - 1]", + "mov byte ptr [{dest} + {rem} - 1], {buf:l}", + "11:", + src = in(reg) src, + dest = in(reg) dest, + rem = inout(reg) rem => _, +// tmp = out(reg) _, + buf = out(reg) _, + options(nostack), + ); + } + /* + unsafe { + core::arch::asm!( + "7:", + "cmp {rem:e}, 4", + "jb 8f", + "sub {rem:e}, 4", + "mov {buf:e}, dword ptr [{src} + {rem}]", + "mov dword ptr [{dest} + {rem}], {buf:e}", + "jmp 7b", + "8:", + "test {rem:e}, {rem:e}", + "jz 10f", + "sub {rem:e}, 1", + "mov {buf:l}, byte ptr [{src} + {rem}]", + "mov byte ptr [{dest} + {rem}], {buf:l}", + "jnz 8b", + "10:", + src = in(reg) src, + dest = in(reg) dest, + rem = in(reg) rem, +// tmp = out(reg) _, + buf = out(reg) _, + options(nostack), + ); + } + */ + /* + unsafe { + core::arch::asm!( + "mov {tmp}, {rem}", + "and {tmp}, 3", + "je 3f", + "sub {rem}, {tmp}", + "2:", + "mov {buf:l}, byte ptr [{src}]", + "mov byte ptr [{dest}], {buf:l}", + "add {src}, 1", + "add {dest}, 1", + "sub {tmp}, 1", + "jnz 2b", + "3:", + "test {rem}, {rem}", + "jz 5f", + "4:", + "sub {rem}, 4", + "mov {buf:e}, dword ptr [{src} + {rem}]", + "mov dword ptr [{dest} + {rem}], {buf:e}", + "jnz 4b", + "5:", + src = in(reg) src, + dest = in(reg) dest, + rem = in(reg) rem, + tmp = out(reg) _, + buf = out(reg) _, + ); + } + */ + /* + for i in 0..new_bytes.len() { + unsafe { + buf.as_mut_ptr().offset(buf.len() as isize).offset(i as isize).write_volatile(new_bytes[i]); + } + } + */ + } + } + fn danger_anguished_smaller_variable_length_bstring_write(buf: &mut alloc::vec::Vec, new_bytes: &[u8]) { + if new_bytes.len() >= 8 { + unsafe { core::hint::unreachable_unchecked() } + } + if new_bytes.len() == 0 { + unsafe { core::hint::unreachable_unchecked() } + } + + unsafe { + let dest = buf.as_mut_ptr().offset(buf.len() as isize); + let src = new_bytes.as_ptr(); + + let mut rem = new_bytes.len() as isize; + unsafe { + buf.set_len(buf.len() + new_bytes.len()); + } + /* + while rem % 4 > 0 { + dest.offset(rem - 1).write_unaligned(src.offset(rem - 1).read_unaligned()); + rem -= 1; + } + + while rem > 0 { + (dest.offset(rem - 4) as *mut u32).write_unaligned(unsafe { + *core::mem::transmute::<&u8, &u32>(&new_bytes[rem as usize - 4]) + }); + rem -= 4; + } + */ + unsafe { + /* + if rem >= 4 { + rem -= 4; + (dest.offset(rem as isize) as *mut u32).write_unaligned((src.offset(rem as isize) as *const u32).read_unaligned()); + if rem == 0 { + return; + } + } + if rem >= 2 { + rem -= 2; + (dest.offset(rem as isize) as *mut u16).write_unaligned((src.offset(rem as isize) as *const u16).read_unaligned()); + if rem == 0 { + return; + } + } + if rem >= 1 { + rem -= 1; + (dest.offset(rem as isize) as *mut u8).write_unaligned((src.offset(rem as isize) as *const u8).read_unaligned()) + } + */ + core::arch::asm!( + "8:", + "cmp {rem:e}, 4", + "jb 9f", + "mov {buf:e}, dword ptr [{src} + {rem} - 4]", + "mov dword ptr [{dest} + {rem} - 4], {buf:e}", + "sub {rem:e}, 4", + "jz 11f", + "9:", + "cmp {rem:e}, 2", + "jb 10f", + "mov {buf:x}, word ptr [{src} + {rem} - 2]", + "mov word ptr [{dest} + {rem} - 2], {buf:x}", + "sub {rem:e}, 2", + "jz 11f", + "10:", + "cmp {rem:e}, 1", + "jb 11f", + "mov {buf:l}, byte ptr [{src} + {rem} - 1]", + "mov byte ptr [{dest} + {rem} - 1], {buf:l}", + "11:", + src = in(reg) src, + dest = in(reg) dest, + rem = inout(reg) rem => _, +// tmp = out(reg) _, + buf = out(reg) _, + options(nostack), + ); + } + /* + unsafe { + core::arch::asm!( + "7:", + "cmp {rem:e}, 4", + "jb 8f", + "sub {rem:e}, 4", + "mov {buf:e}, dword ptr [{src} + {rem}]", + "mov dword ptr [{dest} + {rem}], {buf:e}", + "jmp 7b", + "8:", + "test {rem:e}, {rem:e}", + "jz 10f", + "sub {rem:e}, 1", + "mov {buf:l}, byte ptr [{src} + {rem}]", + "mov byte ptr [{dest} + {rem}], {buf:l}", + "jnz 8b", + "10:", + src = in(reg) src, + dest = in(reg) dest, + rem = in(reg) rem, +// tmp = out(reg) _, + buf = out(reg) _, + options(nostack), + ); + } + */ + /* + unsafe { + core::arch::asm!( + "mov {tmp}, {rem}", + "and {tmp}, 3", + "je 3f", + "sub {rem}, {tmp}", + "2:", + "mov {buf:l}, byte ptr [{src}]", + "mov byte ptr [{dest}], {buf:l}", + "add {src}, 1", + "add {dest}, 1", + "sub {tmp}, 1", + "jnz 2b", + "3:", + "test {rem}, {rem}", + "jz 5f", + "4:", + "sub {rem}, 4", + "mov {buf:e}, dword ptr [{src} + {rem}]", + "mov dword ptr [{dest} + {rem}], {buf:e}", + "jnz 4b", + "5:", + src = in(reg) src, + dest = in(reg) dest, + rem = in(reg) rem, + tmp = out(reg) _, + buf = out(reg) _, + ); + } + */ + /* + for i in 0..new_bytes.len() { + unsafe { + buf.as_mut_ptr().offset(buf.len() as isize).offset(i as isize).write_volatile(new_bytes[i]); + } + } + */ + } + } + + let address: u64 = 0; + let context = Some(&NoContext); + let colors = &NoColors; + if self.prefixes.rep_any() { + if self.xacquire() { + danger_anguished_string_write(out, "xacquire "); + } + if self.xrelease() { + danger_anguished_string_write(out, "xrelease "); + } + + if self.opcode.can_rep() { + if self.prefixes.rep() { + danger_anguished_string_write(out, "rep "); + } else if self.prefixes.repnz() { + danger_anguished_string_write(out, "repnz "); + } + } + } + + if self.prefixes.lock() { + danger_anguished_string_write(out, "lock "); + } + + use core::mem::MaybeUninit; + + danger_anguished_variable_length_string_write(out, self.opcode.name()); + + if self.operand_count > 0 { + danger_anguished_string_write(out, " "); + +// let x = Operand::from_spec(self, self.operands[0]); + + struct RelativeBranchPrinter<'a, Y: YaxColors> { + inst: &'a Instruction, + colors: &'a Y, + out: &'a mut alloc::string::String, + } + + impl<'a, Y: YaxColors> crate::long_mode::OperandVisitor for RelativeBranchPrinter<'a, Y> { + // return true if we printed a relative branch offset, false otherwise + type Ok = bool; + // but errors are errors + type Error = fmt::Error; + + fn visit_reg(&mut self, reg: RegSpec) -> Result { + Ok(false) + } + fn visit_deref(&mut self, reg: RegSpec) -> Result { + Ok(false) + } + fn visit_disp(&mut self, reg: RegSpec, disp: i32) -> Result { + Ok(false) + } + #[cfg_attr(feature="profiling", inline(never))] + fn visit_i8(&mut self, rel: i8) -> Result { + if RELATIVE_BRANCHES.contains(&self.inst.opcode) { + danger_anguished_string_write(self.out, "$"); + let mut v = rel as u8; + if rel < 0 { + danger_anguished_string_write(&mut self.out, "-"); + v = -rel as u8; + } else { + danger_anguished_string_write(&mut self.out, "+"); + } + danger_anguished_string_write(self.out, "0x"); + let mut buf = [MaybeUninit::::uninit(); 2]; + let mut curr = buf.len(); + loop { + let digit = v % 16; + let c = c_to_hex(digit as u8); + curr -= 1; + buf[curr].write(c); + v = v / 16; + if v == 0 { + break; + } + } + let buf = &buf[curr..]; + let s = unsafe { + core::mem::transmute::<&[MaybeUninit], &str>(buf) + }; + + anguished_string_write(&mut self.out, s); + Ok(true) + } else { + Ok(false) + } + } + #[cfg_attr(feature="profiling", inline(never))] + fn visit_i32(&mut self, rel: i32) -> Result { + if RELATIVE_BRANCHES.contains(&self.inst.opcode) || self.inst.opcode == Opcode::XBEGIN { + danger_anguished_string_write(self.out, "$"); + let mut v = rel as u32; + if rel < 0 { + danger_anguished_string_write(&mut self.out, "-"); + v = -rel as u32; + } else { + danger_anguished_string_write(&mut self.out, "+"); + } + danger_anguished_string_write(self.out, "0x"); + let mut buf = [MaybeUninit::::uninit(); 8]; + let mut curr = buf.len(); + loop { + let digit = v % 16; + let c = c_to_hex(digit as u8); + curr -= 1; + buf[curr].write(c); + v = v / 16; + if v == 0 { + break; + } + } + let buf = &buf[curr..]; + let s = unsafe { + core::mem::transmute::<&[MaybeUninit], &str>(buf) + }; + +// danger_anguished_string_write(&mut self.out, s); + + danger_anguished_variable_length_bstring_write(unsafe { self.out.as_mut_vec() }, s.as_bytes()); + Ok(true) + } else { + Ok(false) + } + } + fn visit_u8(&mut self, imm: u8) -> Result { + Ok(false) + } + fn visit_i16(&mut self, imm: i16) -> Result { + Ok(false) + } + fn visit_u16(&mut self, imm: u16) -> Result { + Ok(false) + } + fn visit_u32(&mut self, imm: u32) -> Result { + Ok(false) + } + fn visit_i64(&mut self, imm: i64) -> Result { + Ok(false) + } + fn visit_u64(&mut self, imm: u64) -> Result { + Ok(false) + } + fn visit_abs_u32(&mut self, imm: u32) -> Result { + Ok(false) + } + fn visit_abs_u64(&mut self, imm: u64) -> Result { + Ok(false) + } + fn visit_reg_scale(&mut self, reg: RegSpec, scale: u8) -> Result { + Ok(false) + } + fn visit_index_base_scale(&mut self, base: RegSpec, index: RegSpec, scale: u8) -> Result { + Ok(false) + } + fn visit_reg_scale_disp(&mut self, reg: RegSpec, scale: u8, disp: i32) -> Result { + Ok(false) + } + fn visit_index_base_scale_disp(&mut self, base: RegSpec, index: RegSpec, scale: u8, disp: i32) -> Result { + Ok(false) + } + fn visit_other(&mut self) -> Result { + Ok(false) + } + fn visit_reg_mask_merge(&mut self, spec: RegSpec, mask: RegSpec, merge_mode: MergeMode) -> Result { + Ok(false) + } + fn visit_reg_mask_merge_sae(&mut self, spec: RegSpec, mask: RegSpec, merge_mode: MergeMode, sae_mode: crate::long_mode::SaeMode) -> Result { + Ok(false) + } + fn visit_reg_mask_merge_sae_noround(&mut self, spec: RegSpec, mask: RegSpec, merge_mode: MergeMode) -> Result { + Ok(false) + } + fn visit_reg_disp_masked(&mut self, spec: RegSpec, disp: i32, mask_reg: RegSpec) -> Result { + Ok(false) + } + fn visit_reg_deref_masked(&mut self, spec: RegSpec, mask_reg: RegSpec) -> Result { + Ok(false) + } + fn visit_reg_scale_masked(&mut self, spec: RegSpec, scale: u8, mask_reg: RegSpec) -> Result { + Ok(false) + } + fn visit_reg_scale_disp_masked(&mut self, spec: RegSpec, scale: u8, disp: i32, mask_reg: RegSpec) -> Result { + Ok(false) + } + fn visit_index_base_masked(&mut self, base: RegSpec, index: RegSpec, mask_reg: RegSpec) -> Result { + Ok(false) + } + fn visit_index_base_disp_masked(&mut self, base: RegSpec, index: RegSpec, disp: i32, mask_reg: RegSpec) -> Result { + Ok(false) + } + fn visit_index_base_scale_masked(&mut self, base: RegSpec, index: RegSpec, scale: u8, mask_reg: RegSpec) -> Result { + Ok(false) + } + fn visit_index_base_scale_disp_masked(&mut self, base: RegSpec, index: RegSpec, scale: u8, disp: i32, mask_reg: RegSpec) -> Result { + Ok(false) + } + } + + if self.visit_operand(0, &mut RelativeBranchPrinter { + inst: &self, + colors, + out, + })? { + return Ok(()); + } + + fn display_op(inst: &Instruction, op_nr: u8, colors: &Y, out: &mut alloc::string::String) -> fmt::Result { + struct OperandPrinter<'a, Y: YaxColors> { + out: &'a mut alloc::string::String, + op_nr: u8, + colors: &'a Y, + inst: &'a Instruction, + } + + impl<'a, Y: YaxColors> crate::long_mode::OperandVisitor for OperandPrinter<'a, Y> { + type Ok = (); + type Error = fmt::Error; + + #[cfg_attr(feature="profiling", inline(never))] + fn visit_reg(&mut self, reg: RegSpec) -> Result { + let label = regspec_label(®); + danger_anguished_smaller_variable_length_bstring_write(unsafe { self.out.as_mut_vec() }, label.as_bytes()); +// danger_anguished_variable_length_string_write(self.out, label); + Ok(()) + } + #[cfg_attr(feature="profiling", inline(never))] + fn visit_deref(&mut self, reg: RegSpec) -> Result { + danger_anguished_smaller_variable_length_bstring_write(unsafe { self.out.as_mut_vec() }, mem_size_label(self.inst.mem_size).as_bytes()); +// self.out.write_str(" ")?; + + if self.op_nr >= 4 { + unsafe { core::hint::unreachable_unchecked(); } + } + if let Some(prefix) = self.inst.segment_override_for_op(self.op_nr) { + danger_anguished_string_write(self.out, " "); + danger_anguished_bstring_write(unsafe{self.out.as_mut_vec()}, prefix.name()); +// self.out.write_str(":")?; + danger_anguished_string_write(self.out, ":["); + } else { +// self.out.write_str("[")?; + danger_anguished_string_write(self.out, " ["); + } + let label = regspec_label(®); + danger_anguished_smaller_variable_length_bstring_write(unsafe { self.out.as_mut_vec() }, label.as_bytes()); +// self.out.write_str("]") + danger_anguished_string_write(self.out, "]"); + Ok(()) + } + #[cfg_attr(feature="profiling", inline(never))] + fn visit_disp(&mut self, reg: RegSpec, disp: i32) -> Result { + danger_anguished_smaller_variable_length_bstring_write(unsafe { self.out.as_mut_vec() }, mem_size_label(self.inst.mem_size).as_bytes()); + + if self.op_nr >= 4 { + unsafe { core::hint::unreachable_unchecked(); } + } + if let Some(prefix) = self.inst.segment_override_for_op(self.op_nr) { + danger_anguished_string_write(self.out, " "); + danger_anguished_bstring_write(unsafe{self.out.as_mut_vec()}, prefix.name()); + danger_anguished_string_write(self.out, ":["); + } else { + danger_anguished_string_write(self.out, " ["); + } + let label = regspec_label(®); + if label.len() < 2 { + unsafe { core::hint::unreachable_unchecked(); } + } + danger_anguished_smaller_variable_length_bstring_write(unsafe { self.out.as_mut_vec() }, label.as_bytes()); + // write!(self.out, "{}", self.colors.number(signed_i32_hex(disp)))?; + let mut v = disp as u32; + if disp < 0 { + danger_anguished_string_write(self.out, " - 0x"); + v = -disp as u32; + } else { + danger_anguished_string_write(self.out, " + 0x"); + } + if v == 0 { + danger_anguished_string_write(self.out, "0"); + } else { + let lzcnt = v.leading_zeros(); + let mut digits = 8 - (lzcnt/8); + while digits > 0 { + let digit = (v >> (digits * 8)) & 0xf; + let c = c_to_hex(digit as u8); + danger_anguished_bstring_write(unsafe {self.out.as_mut_vec()}, &[c]); + digits -= 1; + } + } + /* + let mut buf = [MaybeUninit::::uninit(); 8]; + let mut curr = buf.len(); + loop { + let digit = v % 16; + static CHARSET: &'static [u8; 16] = b"0123456789abcdef"; + let c = CHARSET[digit as usize]; + curr -= 1; + buf[curr].write(c); + v = v / 16; + if v == 0 { + break; + } + } + let buf = &buf[curr..]; + let s = unsafe { + core::mem::transmute::<&[MaybeUninit], &str>(buf) + }; + danger_anguished_string_write(&mut self.out, s); + */ + danger_anguished_string_write(&mut self.out, "]"); + Ok(()) + } + #[cfg_attr(feature="profiling", inline(never))] + fn visit_i8(&mut self, imm: i8) -> Result { + let mut v = imm as u8; + if imm < 0 { + danger_anguished_string_write(&mut self.out, "-"); + v = -imm as u8; + } + danger_anguished_string_write(&mut self.out, "0x"); + let mut buf = [MaybeUninit::::uninit(); 2]; + let mut curr = buf.len(); + loop { + let digit = v % 16; + let c = c_to_hex(digit as u8); + curr -= 1; + buf[curr].write(c); + v = v / 16; + if v == 0 { + break; + } + } + let buf = &buf[curr..]; + let s: &str = unsafe { + core::mem::transmute::<&[MaybeUninit], &str>(buf) + }; + danger_anguished_string_write(&mut self.out, s); + Ok(()) + } + #[cfg_attr(feature="profiling", inline(never))] + fn visit_u8(&mut self, imm: u8) -> Result { + let mut v = imm as u8; + danger_anguished_string_write(&mut self.out, "0x"); + let mut buf = [MaybeUninit::::uninit(); 2]; + let mut curr = buf.len(); + loop { + let digit = v % 16; + let c = c_to_hex(digit as u8); + curr -= 1; + buf[curr].write(c); + v = v / 16; + if v == 0 { + break; + } + } + let buf = &buf[curr..]; + let s: &str = unsafe { + core::mem::transmute::<&[MaybeUninit], &str>(buf) + }; + danger_anguished_string_write(&mut self.out, s); + Ok(()) + + } + #[cfg_attr(feature="profiling", inline(never))] + fn visit_i16(&mut self, imm: i16) -> Result { + let mut v = imm as u16; + if imm < 0 { + danger_anguished_string_write(&mut self.out, "-"); + v = -imm as u16; + } + danger_anguished_string_write(&mut self.out, "0x"); + let mut buf = [MaybeUninit::::uninit(); 4]; + let mut curr = buf.len(); + loop { + let digit = v % 16; + let c = c_to_hex(digit as u8); + curr -= 1; + buf[curr].write(c); + v = v / 16; + if v == 0 { + break; + } + } + let buf = &buf[curr..]; + let s: &str = unsafe { + core::mem::transmute::<&[MaybeUninit], &str>(buf) + }; + anguished_string_write(&mut self.out, s); + Ok(()) + } + #[cfg_attr(feature="profiling", inline(never))] + fn visit_u16(&mut self, imm: u16) -> Result { + let mut v = imm as u32; + danger_anguished_string_write(&mut self.out, "0x"); + let mut buf = [MaybeUninit::::uninit(); 4]; + let mut curr = buf.len(); + loop { + let digit = v % 16; + let c = c_to_hex(digit as u8); + curr -= 1; + buf[curr].write(c); + v = v / 16; + if v == 0 { + break; + } + } + let buf = &buf[curr..]; + let s = unsafe { + core::mem::transmute::<&[MaybeUninit], &str>(buf) + }; + anguished_string_write(&mut self.out, s); + Ok(()) + } + #[cfg_attr(feature="profiling", inline(never))] + fn visit_i32(&mut self, imm: i32) -> Result { + let mut v = imm as u32; + if imm < 0 { + danger_anguished_string_write(&mut self.out, "-"); + v = -imm as u32; + } + danger_anguished_string_write(&mut self.out, "0x"); + let mut buf = [MaybeUninit::::uninit(); 8]; + let mut curr = buf.len(); + loop { + let digit = v % 16; + let c = c_to_hex(digit as u8); + curr -= 1; + buf[curr].write(c); + v = v / 16; + if v == 0 { + break; + } + } + let buf = &buf[curr..]; + let s = unsafe { + core::mem::transmute::<&[MaybeUninit], &str>(buf) + }; + // danger_anguished_string_write(&mut self.out, s); + danger_anguished_smaller_variable_length_bstring_write(unsafe { self.out.as_mut_vec() }, s.as_bytes()); + Ok(()) + } + #[cfg_attr(feature="profiling", inline(never))] + fn visit_u32(&mut self, imm: u32) -> Result { + let mut v = imm as u32; + danger_anguished_string_write(&mut self.out, "0x"); + let mut buf = [MaybeUninit::::uninit(); 8]; + let mut curr = buf.len(); + loop { + let digit = v % 16; + let c = c_to_hex(digit as u8); + curr -= 1; + buf[curr].write(c); + v = v / 16; + if v == 0 { + break; + } + } + let buf = &buf[curr..]; + let s = unsafe { + core::mem::transmute::<&[MaybeUninit], &str>(buf) + }; + //danger_anguished_string_write(&mut self.out, s); + danger_anguished_smaller_variable_length_bstring_write(unsafe { self.out.as_mut_vec() }, s.as_bytes()); + Ok(()) + } + #[cfg_attr(feature="profiling", inline(never))] + fn visit_i64(&mut self, imm: i64) -> Result { + let mut v = imm as u32; + if imm < 0 { + danger_anguished_string_write(&mut self.out, "-"); + v = -imm as u32; + } + danger_anguished_string_write(&mut self.out, "0x"); + let mut buf = [MaybeUninit::::uninit(); 16]; + let mut curr = buf.len(); + loop { + let digit = v % 16; + let c = c_to_hex(digit as u8); + curr -= 1; + buf[curr].write(c); + v = v / 16; + if v == 0 { + break; + } + } + let buf = &buf[curr..]; + let s = unsafe { + core::mem::transmute::<&[MaybeUninit], &str>(buf) + }; + danger_anguished_variable_length_bstring_write(unsafe { self.out.as_mut_vec() }, s.as_bytes()); + Ok(()) + + + } + #[cfg_attr(feature="profiling", inline(never))] + fn visit_u64(&mut self, imm: u64) -> Result { + let mut v = imm as u64; + danger_anguished_string_write(&mut self.out, "0x"); + let mut buf = [MaybeUninit::::uninit(); 16]; + let mut curr = buf.len(); + loop { + let digit = v % 16; + let c = c_to_hex(digit as u8); + curr -= 1; + buf[curr].write(c); + v = v / 16; + if v == 0 { + break; + } + } + let buf = &buf[curr..]; + let s = unsafe { + core::mem::transmute::<&[MaybeUninit], &str>(buf) + }; + danger_anguished_variable_length_bstring_write(unsafe { self.out.as_mut_vec() }, s.as_bytes()); + Ok(()) + } + #[cfg_attr(feature="profiling", inline(never))] + fn visit_abs_u32(&mut self, imm: u32) -> Result { + danger_anguished_smaller_variable_length_bstring_write(unsafe { self.out.as_mut_vec() }, mem_size_label(self.inst.mem_size).as_bytes()); + danger_anguished_string_write(self.out, " [0x"); + let mut v = imm as u32; + let mut buf = [MaybeUninit::::uninit(); 16]; + let mut curr = buf.len(); + loop { + let digit = v % 16; + let c = c_to_hex(digit as u8); + curr -= 1; + buf[curr].write(c); + v = v / 16; + if v == 0 { + break; + } + } + let buf = &buf[curr..]; + let s = unsafe { + core::mem::transmute::<&[MaybeUninit], &str>(buf) + }; + // anguished_string_write(&mut self.out, s); + danger_anguished_smaller_variable_length_bstring_write(unsafe { self.out.as_mut_vec() }, s.as_bytes()); + danger_anguished_string_write(self.out, "]"); + Ok(()) + } + #[cfg_attr(feature="profiling", inline(never))] + fn visit_abs_u64(&mut self, imm: u64) -> Result { + danger_anguished_smaller_variable_length_bstring_write(unsafe { self.out.as_mut_vec() }, mem_size_label(self.inst.mem_size).as_bytes()); + danger_anguished_string_write(self.out, " [0x"); + let mut v = imm as u64; + let mut buf = [MaybeUninit::::uninit(); 16]; + let mut curr = buf.len(); + loop { + let digit = v % 16; + let c = c_to_hex(digit as u8); + curr -= 1; + buf[curr].write(c); + v = v / 16; + if v == 0 { + break; + } + } + let buf = &buf[curr..]; + let s = unsafe { + core::mem::transmute::<&[MaybeUninit], &str>(buf) + }; + // anguished_string_write(&mut self.out, s); + danger_anguished_smaller_variable_length_bstring_write(unsafe { self.out.as_mut_vec() }, s.as_bytes()); + danger_anguished_string_write(self.out, "]"); + Ok(()) + } + #[cfg_attr(feature="profiling", inline(never))] + fn visit_reg_scale(&mut self, reg: RegSpec, scale: u8) -> Result { + danger_anguished_smaller_variable_length_bstring_write(unsafe { self.out.as_mut_vec() }, mem_size_label(self.inst.mem_size).as_bytes()); + danger_anguished_string_write(self.out, " "); + + if self.op_nr >= 4 { + unsafe { core::hint::unreachable_unchecked(); } + } + if let Some(prefix) = self.inst.segment_override_for_op(self.op_nr) { + danger_anguished_bstring_write(unsafe{self.out.as_mut_vec()}, prefix.name()); + danger_anguished_string_write(self.out, ":"); + } + danger_anguished_string_write(self.out, "["); + let label = regspec_label(®); + danger_anguished_smaller_variable_length_bstring_write(unsafe { self.out.as_mut_vec() }, label.as_bytes()); + danger_anguished_string_write(self.out, " * "); + danger_anguished_bstring_write(unsafe { self.out.as_mut_vec() }, &[scale + b'0']); + danger_anguished_string_write(self.out, "]"); + Ok(()) + } + #[cfg_attr(feature="profiling", inline(never))] + fn visit_index_base_scale(&mut self, base: RegSpec, index: RegSpec, scale: u8) -> Result { + danger_anguished_smaller_variable_length_bstring_write(unsafe { self.out.as_mut_vec() }, mem_size_label(self.inst.mem_size).as_bytes()); + danger_anguished_string_write(self.out, " "); + + if self.op_nr >= 4 { + unsafe { core::hint::unreachable_unchecked(); } + } + if let Some(prefix) = self.inst.segment_override_for_op(self.op_nr) { + danger_anguished_bstring_write(unsafe{self.out.as_mut_vec()}, prefix.name()); + danger_anguished_string_write(self.out, ":"); + } + danger_anguished_string_write(self.out, "["); + let label = regspec_label(&base); + danger_anguished_smaller_variable_length_bstring_write(unsafe { self.out.as_mut_vec() }, label.as_bytes()); + danger_anguished_string_write(self.out, " + "); + let label = regspec_label(&index); + danger_anguished_smaller_variable_length_bstring_write(unsafe { self.out.as_mut_vec() }, label.as_bytes()); + danger_anguished_string_write(self.out, " * "); + danger_anguished_bstring_write(unsafe { self.out.as_mut_vec() }, &[scale + b'0']); + danger_anguished_string_write(self.out, "]"); + Ok(()) + } + #[cfg_attr(feature="profiling", inline(never))] + fn visit_reg_scale_disp(&mut self, reg: RegSpec, scale: u8, disp: i32) -> Result { + danger_anguished_smaller_variable_length_bstring_write(unsafe { self.out.as_mut_vec() }, mem_size_label(self.inst.mem_size).as_bytes()); + danger_anguished_string_write(self.out, " "); + + if self.op_nr >= 4 { + unsafe { core::hint::unreachable_unchecked(); } + } + if let Some(prefix) = self.inst.segment_override_for_op(self.op_nr) { + danger_anguished_bstring_write(unsafe{self.out.as_mut_vec()}, prefix.name()); + danger_anguished_string_write(self.out, ":"); + } + danger_anguished_string_write(self.out, "["); + let label = regspec_label(®); + danger_anguished_smaller_variable_length_bstring_write(unsafe { self.out.as_mut_vec() }, label.as_bytes()); + danger_anguished_string_write(self.out, " * "); + danger_anguished_bstring_write(unsafe { self.out.as_mut_vec() }, &[scale + b'0']); + let mut v = disp as u32; + if disp < 0 { + danger_anguished_string_write(self.out, " - "); + v = -disp as u32; + } else { + danger_anguished_string_write(self.out, " + "); + } + danger_anguished_string_write(self.out, "0x"); + let mut buf = [MaybeUninit::::uninit(); 8]; + let mut curr = buf.len(); + loop { + let digit = v % 16; + let c = c_to_hex(digit as u8); + curr -= 1; + buf[curr].write(c); + v = v / 16; + if v == 0 { + break; + } + } + let buf = &buf[curr..]; + let s = unsafe { + core::mem::transmute::<&[MaybeUninit], &str>(buf) + }; + // anguished_string_write(&mut self.out, s); + danger_anguished_smaller_variable_length_bstring_write(unsafe { self.out.as_mut_vec() }, s.as_bytes()); + danger_anguished_string_write(self.out, "]"); + Ok(()) + } + #[cfg_attr(feature="profiling", inline(never))] + fn visit_index_base_scale_disp(&mut self, base: RegSpec, index: RegSpec, scale: u8, disp: i32) -> Result { + danger_anguished_smaller_variable_length_bstring_write(unsafe { self.out.as_mut_vec() }, mem_size_label(self.inst.mem_size).as_bytes()); + danger_anguished_string_write(self.out, " "); + + if self.op_nr >= 4 { + unsafe { core::hint::unreachable_unchecked(); } + } + if let Some(prefix) = self.inst.segment_override_for_op(self.op_nr) { + danger_anguished_bstring_write(unsafe{self.out.as_mut_vec()}, prefix.name()); + danger_anguished_string_write(self.out, ":"); + } + danger_anguished_string_write(self.out, "["); + let label = regspec_label(&base); + danger_anguished_smaller_variable_length_bstring_write(unsafe { self.out.as_mut_vec() }, label.as_bytes()); + danger_anguished_string_write(self.out, " + "); + let label = regspec_label(&index); + danger_anguished_smaller_variable_length_bstring_write(unsafe { self.out.as_mut_vec() }, label.as_bytes()); + danger_anguished_string_write(self.out, " * "); + danger_anguished_bstring_write(unsafe { self.out.as_mut_vec() }, &[scale + b'0']); + let mut v = disp as u32; + if disp < 0 { + danger_anguished_string_write(self.out, " - "); + v = -disp as u32; + } else { + danger_anguished_string_write(self.out, " + "); + } + danger_anguished_string_write(self.out, "0x"); + let mut buf = [MaybeUninit::::uninit(); 8]; + let mut curr = buf.len(); + loop { + let digit = v % 16; + let c = c_to_hex(digit as u8); + curr -= 1; + buf[curr].write(c); + v = v / 16; + if v == 0 { + break; + } + } + let buf = &buf[curr..]; + let s = unsafe { + core::mem::transmute::<&[MaybeUninit], &str>(buf) + }; + danger_anguished_smaller_variable_length_bstring_write(unsafe { self.out.as_mut_vec() }, s.as_bytes()); + danger_anguished_string_write(self.out, "]"); + Ok(()) + } + fn visit_other(&mut self) -> Result { + Ok(()) + } + fn visit_reg_mask_merge(&mut self, spec: RegSpec, mask: RegSpec, merge_mode: MergeMode) -> Result { + Ok(()) + } + fn visit_reg_mask_merge_sae(&mut self, spec: RegSpec, mask: RegSpec, merge_mode: MergeMode, sae_mode: crate::long_mode::SaeMode) -> Result { + Ok(()) + } + fn visit_reg_mask_merge_sae_noround(&mut self, spec: RegSpec, mask: RegSpec, merge_mode: MergeMode) -> Result { + Ok(()) + } + fn visit_reg_disp_masked(&mut self, spec: RegSpec, disp: i32, mask_reg: RegSpec) -> Result { + Ok(()) + } + fn visit_reg_deref_masked(&mut self, spec: RegSpec, mask_reg: RegSpec) -> Result { + Ok(()) + } + fn visit_reg_scale_masked(&mut self, spec: RegSpec, scale: u8, mask_reg: RegSpec) -> Result { + Ok(()) + } + fn visit_reg_scale_disp_masked(&mut self, spec: RegSpec, scale: u8, disp: i32, mask_reg: RegSpec) -> Result { + Ok(()) + } + fn visit_index_base_masked(&mut self, base: RegSpec, index: RegSpec, mask_reg: RegSpec) -> Result { + Ok(()) + } + fn visit_index_base_disp_masked(&mut self, base: RegSpec, index: RegSpec, disp: i32, mask_reg: RegSpec) -> Result { + Ok(()) + } + fn visit_index_base_scale_masked(&mut self, base: RegSpec, index: RegSpec, scale: u8, mask_reg: RegSpec) -> Result { + Ok(()) + } + fn visit_index_base_scale_disp_masked(&mut self, base: RegSpec, index: RegSpec, scale: u8, disp: i32, mask_reg: RegSpec) -> Result { + Ok(()) + } + } + + let mut printer = OperandPrinter { + out, + inst, + op_nr, + colors, + }; + inst.visit_operand(op_nr, &mut printer) + } + + display_op(self, 0, colors, out)?; + + for i in 1..self.operand_count { + match self.opcode { + _ => { + match &self.operands[i as usize] { + &OperandSpec::Nothing => { + // should never see a Nothing if we iterate only through + // `operand_count`.. + unsafe { crate::long_mode::unreachable_unchecked() } + }, + _ => { + danger_anguished_string_write(out, ", "); + display_op(self, i, colors, out)?; + if let Some(evex) = self.prefixes.evex() { + if evex.broadcast() && false { // x.is_memory() { + let scale = if self.opcode == Opcode::VCVTPD2PS || self.opcode == Opcode::VCVTTPD2UDQ || self.opcode == Opcode::VCVTPD2UDQ || self.opcode == Opcode::VCVTUDQ2PD || self.opcode == Opcode::VCVTPS2PD || self.opcode == Opcode::VCVTQQ2PS || self.opcode == Opcode::VCVTDQ2PD || self.opcode == Opcode::VCVTTPD2DQ || self.opcode == Opcode::VFPCLASSPS || self.opcode == Opcode::VFPCLASSPD || self.opcode == Opcode::VCVTNEPS2BF16 || self.opcode == Opcode::VCVTUQQ2PS || self.opcode == Opcode::VCVTPD2DQ || self.opcode == Opcode::VCVTTPS2UQQ || self.opcode == Opcode::VCVTPS2UQQ || self.opcode == Opcode::VCVTTPS2QQ || self.opcode == Opcode::VCVTPS2QQ { + if self.opcode == Opcode::VFPCLASSPS || self.opcode == Opcode::VCVTNEPS2BF16 { + if evex.vex().l() { + 8 + } else if evex.lp() { + 16 + } else { + 4 + } + } else if self.opcode == Opcode::VFPCLASSPD { + if evex.vex().l() { + 4 + } else if evex.lp() { + 8 + } else { + 2 + } + } else { + // vcvtpd2ps is "cool": in broadcast mode, it can read a + // double-precision float (qword), resize to single-precision, + // then broadcast that to the whole destination register. this + // means we need to show `xmm, qword [addr]{1to4}` if vector + // size is 256. likewise, scale of 8 for the same truncation + // reason if vector size is 512. + // vcvtudq2pd is the same story. + // vfpclassp{s,d} is a mystery to me. + if evex.vex().l() { + 4 + } else if evex.lp() { + 8 + } else { + 2 + } + } + } else { + // this should never be `None` - that would imply two + // memory operands for a broadcasted operation. + if let Some(width) = Operand::from_spec(self, self.operands[i as usize - 1]).width() { + width / self.mem_size + } else { + 0 + } + }; + write!(out, "{{1to{}}}", scale)?; + } + } + } + } + } + } + } + } + Ok(()) + } + pub fn write_to(&self, out: &mut T) -> fmt::Result { self.display_with(DisplayStyle::Intel).contextualize(&NoColors, 0, Some(&NoContext), out) } diff --git a/test/long_mode/mod.rs b/test/long_mode/mod.rs index b725287..21b92e6 100644 --- a/test/long_mode/mod.rs +++ b/test/long_mode/mod.rs @@ -62,6 +62,19 @@ fn test_display_under(decoder: &InstDecoder, data: &[u8], expected: &'static str text, expected ); + /* + let mut text2 = String::new(); + instr.write_2(&mut text2); + assert!( + text2 == text, + "display error for {}:\n decoded: {:?} under decoder {}\n displayed: {}\n expected: {}\n", + hex, + instr, + decoder, + text2, + text, + ); + */ } else { eprintln!("non-fmt build cannot compare text equality") } From 2df5d55b4d87cec9ea618def24cee63997ba8c4c Mon Sep 17 00:00:00 2001 From: iximeow Date: Sun, 16 Jun 2024 12:36:23 -0700 Subject: [PATCH 10/95] move to shared (safe) impl of RelativeBranchPrinter --- src/long_mode/display.rs | 169 --------------------------------------- 1 file changed, 169 deletions(-) diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index ba20c1c..6d9969d 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -4140,175 +4140,6 @@ impl Instruction { if self.operand_count > 0 { danger_anguished_string_write(out, " "); -// let x = Operand::from_spec(self, self.operands[0]); - - struct RelativeBranchPrinter<'a, Y: YaxColors> { - inst: &'a Instruction, - colors: &'a Y, - out: &'a mut alloc::string::String, - } - - impl<'a, Y: YaxColors> crate::long_mode::OperandVisitor for RelativeBranchPrinter<'a, Y> { - // return true if we printed a relative branch offset, false otherwise - type Ok = bool; - // but errors are errors - type Error = fmt::Error; - - fn visit_reg(&mut self, reg: RegSpec) -> Result { - Ok(false) - } - fn visit_deref(&mut self, reg: RegSpec) -> Result { - Ok(false) - } - fn visit_disp(&mut self, reg: RegSpec, disp: i32) -> Result { - Ok(false) - } - #[cfg_attr(feature="profiling", inline(never))] - fn visit_i8(&mut self, rel: i8) -> Result { - if RELATIVE_BRANCHES.contains(&self.inst.opcode) { - danger_anguished_string_write(self.out, "$"); - let mut v = rel as u8; - if rel < 0 { - danger_anguished_string_write(&mut self.out, "-"); - v = -rel as u8; - } else { - danger_anguished_string_write(&mut self.out, "+"); - } - danger_anguished_string_write(self.out, "0x"); - let mut buf = [MaybeUninit::::uninit(); 2]; - let mut curr = buf.len(); - loop { - let digit = v % 16; - let c = c_to_hex(digit as u8); - curr -= 1; - buf[curr].write(c); - v = v / 16; - if v == 0 { - break; - } - } - let buf = &buf[curr..]; - let s = unsafe { - core::mem::transmute::<&[MaybeUninit], &str>(buf) - }; - - anguished_string_write(&mut self.out, s); - Ok(true) - } else { - Ok(false) - } - } - #[cfg_attr(feature="profiling", inline(never))] - fn visit_i32(&mut self, rel: i32) -> Result { - if RELATIVE_BRANCHES.contains(&self.inst.opcode) || self.inst.opcode == Opcode::XBEGIN { - danger_anguished_string_write(self.out, "$"); - let mut v = rel as u32; - if rel < 0 { - danger_anguished_string_write(&mut self.out, "-"); - v = -rel as u32; - } else { - danger_anguished_string_write(&mut self.out, "+"); - } - danger_anguished_string_write(self.out, "0x"); - let mut buf = [MaybeUninit::::uninit(); 8]; - let mut curr = buf.len(); - loop { - let digit = v % 16; - let c = c_to_hex(digit as u8); - curr -= 1; - buf[curr].write(c); - v = v / 16; - if v == 0 { - break; - } - } - let buf = &buf[curr..]; - let s = unsafe { - core::mem::transmute::<&[MaybeUninit], &str>(buf) - }; - -// danger_anguished_string_write(&mut self.out, s); - - danger_anguished_variable_length_bstring_write(unsafe { self.out.as_mut_vec() }, s.as_bytes()); - Ok(true) - } else { - Ok(false) - } - } - fn visit_u8(&mut self, imm: u8) -> Result { - Ok(false) - } - fn visit_i16(&mut self, imm: i16) -> Result { - Ok(false) - } - fn visit_u16(&mut self, imm: u16) -> Result { - Ok(false) - } - fn visit_u32(&mut self, imm: u32) -> Result { - Ok(false) - } - fn visit_i64(&mut self, imm: i64) -> Result { - Ok(false) - } - fn visit_u64(&mut self, imm: u64) -> Result { - Ok(false) - } - fn visit_abs_u32(&mut self, imm: u32) -> Result { - Ok(false) - } - fn visit_abs_u64(&mut self, imm: u64) -> Result { - Ok(false) - } - fn visit_reg_scale(&mut self, reg: RegSpec, scale: u8) -> Result { - Ok(false) - } - fn visit_index_base_scale(&mut self, base: RegSpec, index: RegSpec, scale: u8) -> Result { - Ok(false) - } - fn visit_reg_scale_disp(&mut self, reg: RegSpec, scale: u8, disp: i32) -> Result { - Ok(false) - } - fn visit_index_base_scale_disp(&mut self, base: RegSpec, index: RegSpec, scale: u8, disp: i32) -> Result { - Ok(false) - } - fn visit_other(&mut self) -> Result { - Ok(false) - } - fn visit_reg_mask_merge(&mut self, spec: RegSpec, mask: RegSpec, merge_mode: MergeMode) -> Result { - Ok(false) - } - fn visit_reg_mask_merge_sae(&mut self, spec: RegSpec, mask: RegSpec, merge_mode: MergeMode, sae_mode: crate::long_mode::SaeMode) -> Result { - Ok(false) - } - fn visit_reg_mask_merge_sae_noround(&mut self, spec: RegSpec, mask: RegSpec, merge_mode: MergeMode) -> Result { - Ok(false) - } - fn visit_reg_disp_masked(&mut self, spec: RegSpec, disp: i32, mask_reg: RegSpec) -> Result { - Ok(false) - } - fn visit_reg_deref_masked(&mut self, spec: RegSpec, mask_reg: RegSpec) -> Result { - Ok(false) - } - fn visit_reg_scale_masked(&mut self, spec: RegSpec, scale: u8, mask_reg: RegSpec) -> Result { - Ok(false) - } - fn visit_reg_scale_disp_masked(&mut self, spec: RegSpec, scale: u8, disp: i32, mask_reg: RegSpec) -> Result { - Ok(false) - } - fn visit_index_base_masked(&mut self, base: RegSpec, index: RegSpec, mask_reg: RegSpec) -> Result { - Ok(false) - } - fn visit_index_base_disp_masked(&mut self, base: RegSpec, index: RegSpec, disp: i32, mask_reg: RegSpec) -> Result { - Ok(false) - } - fn visit_index_base_scale_masked(&mut self, base: RegSpec, index: RegSpec, scale: u8, mask_reg: RegSpec) -> Result { - Ok(false) - } - fn visit_index_base_scale_disp_masked(&mut self, base: RegSpec, index: RegSpec, scale: u8, disp: i32, mask_reg: RegSpec) -> Result { - Ok(false) - } - } - if self.visit_operand(0, &mut RelativeBranchPrinter { inst: &self, colors, From 6f03facaedf214b7ab84f077027df2c0e2742de5 Mon Sep 17 00:00:00 2001 From: iximeow Date: Sun, 16 Jun 2024 12:55:55 -0700 Subject: [PATCH 11/95] remove branch better handled elsewhere --- src/long_mode/display.rs | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index 6d9969d..4725154 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -4792,14 +4792,6 @@ fn contextualize_intel(instr: &Instruction, colors: out.write_str(instr.opcode.name())?; - if instr.opcode == Opcode::XBEGIN { - if (instr.imm as i32) >= 0 { - return write!(out, " $+{}", colors.number(signed_i32_hex(instr.imm as i32))); - } else { - return write!(out, " ${}", colors.number(signed_i32_hex(instr.imm as i32))); - } - } - if instr.operand_count > 0 { out.write_str(" ")?; From 7ab69f66b983eba11e9864234585265f334caac0 Mon Sep 17 00:00:00 2001 From: iximeow Date: Sun, 16 Jun 2024 13:03:28 -0700 Subject: [PATCH 12/95] use less of core::fmt, write by hand `name()` returning a `[u8; 2]` is nice when there is a specializing and unrolling write implementation, whereas `&str` might not consistently unroll into a simple 2-byte copy (rather than loop). it'll look a little more reasonable soon, hopefully.. --- src/long_mode/display.rs | 41 +++++++++++++++++++++++++++++++--------- 1 file changed, 32 insertions(+), 9 deletions(-) diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index 4725154..3ccbb0f 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -446,7 +446,10 @@ impl crate::long_mode::OperandVisitor for Colorizi self.f.write_str(MEM_SIZE_STRINGS[self.instr.mem_size as usize])?; self.f.write_str(" ")?; if let Some(prefix) = self.instr.segment_override_for_op(self.op_nr) { - write!(self.f, "{}:", prefix)?; + let name = prefix.name(); + self.f.write_char(name[0] as char)?; + self.f.write_char(name[1] as char)?; + self.f.write_char(':')?; } write!(self.f, "[{}]", self.colors.address(u32_hex(imm))) } @@ -454,7 +457,10 @@ impl crate::long_mode::OperandVisitor for Colorizi self.f.write_str(MEM_SIZE_STRINGS[self.instr.mem_size as usize])?; self.f.write_str(" ")?; if let Some(prefix) = self.instr.segment_override_for_op(self.op_nr) { - write!(self.f, "{}:", prefix)?; + let name = prefix.name(); + self.f.write_char(name[0] as char)?; + self.f.write_char(name[1] as char)?; + self.f.write_char(':')?; } write!(self.f, "[{}]", self.colors.address(u64_hex(imm))) } @@ -462,8 +468,10 @@ impl crate::long_mode::OperandVisitor for Colorizi self.f.write_str(MEM_SIZE_STRINGS[self.instr.mem_size as usize])?; self.f.write_str(" ")?; if let Some(prefix) = self.instr.segment_override_for_op(self.op_nr) { - write!(self.f, "{}", prefix)?; - self.f.write_str(":")?; + let name = prefix.name(); + self.f.write_char(name[0] as char)?; + self.f.write_char(name[1] as char)?; + self.f.write_char(':')?; } self.f.write_str("[")?; self.f.write_str(regspec_label(®))?; @@ -475,7 +483,10 @@ impl crate::long_mode::OperandVisitor for Colorizi self.f.write_str(MEM_SIZE_STRINGS[self.instr.mem_size as usize])?; self.f.write_str(" ")?; if let Some(prefix) = self.instr.segment_override_for_op(self.op_nr) { - write!(self.f, "{}:", prefix)?; + let name = prefix.name(); + self.f.write_char(name[0] as char)?; + self.f.write_char(name[1] as char)?; + self.f.write_char(':')?; } self.f.write_str("[")?; self.f.write_str(regspec_label(®))?; @@ -485,7 +496,10 @@ impl crate::long_mode::OperandVisitor for Colorizi self.f.write_str(MEM_SIZE_STRINGS[self.instr.mem_size as usize])?; self.f.write_str(" ")?; if let Some(prefix) = self.instr.segment_override_for_op(self.op_nr) { - write!(self.f, "{}:", prefix)?; + let name = prefix.name(); + self.f.write_char(name[0] as char)?; + self.f.write_char(name[1] as char)?; + self.f.write_char(':')?; } write!(self.f, "[{} * {}]", regspec_label(®), @@ -496,7 +510,10 @@ impl crate::long_mode::OperandVisitor for Colorizi self.f.write_str(MEM_SIZE_STRINGS[self.instr.mem_size as usize])?; self.f.write_str(" ")?; if let Some(prefix) = self.instr.segment_override_for_op(self.op_nr) { - write!(self.f, "{}:", prefix)?; + let name = prefix.name(); + self.f.write_char(name[0] as char)?; + self.f.write_char(name[1] as char)?; + self.f.write_char(':')?; } write!(self.f, "[{} * {} ", regspec_label(®), @@ -509,7 +526,10 @@ impl crate::long_mode::OperandVisitor for Colorizi self.f.write_str(MEM_SIZE_STRINGS[self.instr.mem_size as usize])?; self.f.write_str(" ")?; if let Some(prefix) = self.instr.segment_override_for_op(self.op_nr) { - write!(self.f, "{}:", prefix)?; + let name = prefix.name(); + self.f.write_char(name[0] as char)?; + self.f.write_char(name[1] as char)?; + self.f.write_char(':')?; } write!(self.f, "[{} + {} * {}]", regspec_label(&base), @@ -521,7 +541,10 @@ impl crate::long_mode::OperandVisitor for Colorizi self.f.write_str(MEM_SIZE_STRINGS[self.instr.mem_size as usize])?; self.f.write_str(" ")?; if let Some(prefix) = self.instr.segment_override_for_op(self.op_nr) { - write!(self.f, "{}:", prefix)?; + let name = prefix.name(); + self.f.write_char(name[0] as char)?; + self.f.write_char(name[1] as char)?; + self.f.write_char(':')?; } write!(self.f, "[{} + {} * {} ", regspec_label(&base), From 0399548ac215717e23d2ad76cb1417892b477c74 Mon Sep 17 00:00:00 2001 From: iximeow Date: Mon, 17 Jun 2024 01:35:52 -0700 Subject: [PATCH 13/95] might be an ok way to redesign colorization.... it turns out that yaxpeax-arch's notion of colorization has been broken from the start for systems that do markup without inline sequences (e.g. windows/cmd.exe before vt100 support) --- src/long_mode/display.rs | 74 +++++++++++++++++++++++++++++++++------- 1 file changed, 61 insertions(+), 13 deletions(-) diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index 3ccbb0f..b67acd4 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -362,6 +362,34 @@ impl Colorize for Operand { } } +enum TokenType { + Mnemonic, + Operand, + Immediate, + Register, + Offset, +} + +trait DisplaySink: fmt::Write { +// fn write_str(&mut self, s: &str) -> Result<(), core::fmt::Error>; +// fn write_char(&mut self, c: char) -> Result<(), core::fmt::Error>; + fn span_enter(&mut self, ty: TokenType); + fn span_end(&mut self, ty: TokenType); +} + +impl DisplaySink for T { + /* + fn write_str(&mut self) -> Result<(), core::fmt::Error> { + ::write_str(self, s) + } + fn write_char(&mut self) -> Result<(), core::fmt::Error> { + ::write_char(self, c) + } + */ + fn span_enter(&mut self, _ty: TokenType) { } + fn span_end(&mut self, _ty: TokenType) { } +} + struct ColorizingOperandVisitor<'a, T, Y> { instr: &'a Instruction, op_nr: u8, @@ -369,37 +397,57 @@ struct ColorizingOperandVisitor<'a, T, Y> { f: &'a mut T, } -impl crate::long_mode::OperandVisitor for ColorizingOperandVisitor<'_, T, Y> { +impl crate::long_mode::OperandVisitor for ColorizingOperandVisitor<'_, T, Y> { type Ok = (); type Error = core::fmt::Error; fn visit_u8(&mut self, imm: u8) -> Result { - write!(self.f, "{}", self.colors.number(u8_hex(imm))) + self.f.span_enter(TokenType::Immediate); + write!(self.f, "{}", u8_hex(imm))?; + self.f.span_end(TokenType::Immediate); + Ok(()) } fn visit_i8(&mut self, imm: i8) -> Result { - write!(self.f, "{}", - self.colors.number(signed_i8_hex(imm))) + self.f.span_enter(TokenType::Immediate); + write!(self.f, "{}", signed_i8_hex(imm))?; + self.f.span_end(TokenType::Immediate); + Ok(()) } fn visit_u16(&mut self, imm: u16) -> Result { - write!(self.f, "{}", self.colors.number(u16_hex(imm))) + self.f.span_enter(TokenType::Immediate); + write!(self.f, "{}", u16_hex(imm))?; + self.f.span_end(TokenType::Immediate); + Ok(()) } fn visit_i16(&mut self, imm: i16) -> Result { - write!(self.f, "{}", - self.colors.number(signed_i16_hex(imm))) + self.f.span_enter(TokenType::Immediate); + write!(self.f, "{}", signed_i16_hex(imm))?; + self.f.span_end(TokenType::Immediate); + Ok(()) } fn visit_u32(&mut self, imm: u32) -> Result { - write!(self.f, "{}", self.colors.number(u32_hex(imm))) + self.f.span_enter(TokenType::Immediate); + write!(self.f, "{}", u32_hex(imm))?; + self.f.span_end(TokenType::Immediate); + Ok(()) } fn visit_i32(&mut self, imm: i32) -> Result { - write!(self.f, "{}", - self.colors.number(signed_i32_hex(imm))) + self.f.span_enter(TokenType::Immediate); + write!(self.f, "{}", signed_i32_hex(imm))?; + self.f.span_end(TokenType::Immediate); + Ok(()) } fn visit_u64(&mut self, imm: u64) -> Result { - write!(self.f, "{}", self.colors.number(u64_hex(imm))) + self.f.span_enter(TokenType::Immediate); + write!(self.f, "{}", u64_hex(imm))?; + self.f.span_end(TokenType::Immediate); + Ok(()) } fn visit_i64(&mut self, imm: i64) -> Result { - write!(self.f, "{}", - self.colors.number(signed_i64_hex(imm))) + self.f.span_enter(TokenType::Immediate); + write!(self.f, "{}", signed_i64_hex(imm))?; + self.f.span_end(TokenType::Immediate); + Ok(()) } fn visit_reg(&mut self, reg: RegSpec) -> Result { self.f.write_str(regspec_label(®)) From 1f18a960a5826ce38e2f758b9160df260d120cfc Mon Sep 17 00:00:00 2001 From: iximeow Date: Mon, 17 Jun 2024 01:44:12 -0700 Subject: [PATCH 14/95] add token spans for some registers --- src/long_mode/display.rs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index b67acd4..0d11e33 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -450,13 +450,20 @@ impl crate::long_mode::OperandVisitor for Coloriz Ok(()) } fn visit_reg(&mut self, reg: RegSpec) -> Result { - self.f.write_str(regspec_label(®)) + self.f.span_enter(TokenType::Register); + self.f.write_str(regspec_label(®))?; + self.f.span_end(TokenType::Register); + Ok(()) } fn visit_reg_mask_merge(&mut self, spec: RegSpec, mask: RegSpec, merge_mode: MergeMode) -> Result { + self.f.span_enter(TokenType::Register); self.f.write_str(regspec_label(&spec))?; + self.f.span_end(TokenType::Register); if mask.num != 0 { self.f.write_str("{")?; + self.f.span_enter(TokenType::Register); self.f.write_str(regspec_label(&mask))?; + self.f.span_end(TokenType::Register); self.f.write_str("}")?; } if let MergeMode::Zero = merge_mode { From 0e99d946eee3398d5629d6f29f8bf7387643795a Mon Sep 17 00:00:00 2001 From: iximeow Date: Tue, 18 Jun 2024 11:10:59 -0700 Subject: [PATCH 15/95] enough infratructure to avoid bounds checks, at incredible user cost --- src/long_mode/display.rs | 415 ++++++++++++++++++++++++++++++--------- src/long_mode/mod.rs | 2 + test/long_mode/mod.rs | 12 +- 3 files changed, 327 insertions(+), 102 deletions(-) diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index 0d11e33..6f4320b 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -362,7 +362,7 @@ impl Colorize for Operand { } } -enum TokenType { +pub enum TokenType { Mnemonic, Operand, Immediate, @@ -370,14 +370,44 @@ enum TokenType { Offset, } -trait DisplaySink: fmt::Write { -// fn write_str(&mut self, s: &str) -> Result<(), core::fmt::Error>; -// fn write_char(&mut self, c: char) -> Result<(), core::fmt::Error>; +pub trait DisplaySink: fmt::Write { + // /// may be optimized for writing strings of variable length. + // fn write_str(&mut self, s: &str) -> Result<(), core::fmt::Error>; + fn write_fixed_size(&mut self, s: &str) -> Result<(), core::fmt::Error> { + for c in s.as_bytes().iter() { + self.write_char(*c as char)?; + } + Ok(()) + } + // fn write_char(&mut self, c: char) -> Result<(), core::fmt::Error>; fn span_enter(&mut self, ty: TokenType); fn span_end(&mut self, ty: TokenType); } +pub struct NoColorsSink<'a, T: fmt::Write> { + pub out: &'a mut T, +} + +impl<'a, T: fmt::Write> DisplaySink for NoColorsSink<'a, T> { + fn span_enter(&mut self, _ty: TokenType) { } + fn span_end(&mut self, _ty: TokenType) { } +} + +impl<'a, T: fmt::Write> fmt::Write for NoColorsSink<'a, T> { + fn write_str(&mut self, s: &str) -> Result<(), core::fmt::Error> { + self.out.write_str(s) + } + fn write_char(&mut self, c: char) -> Result<(), core::fmt::Error> { + self.out.write_char(c) + } + fn write_fmt(&mut self, f: fmt::Arguments) -> Result<(), core::fmt::Error> { + self.out.write_fmt(f) + } +} + +/* impl DisplaySink for T { + /* fn write_str(&mut self) -> Result<(), core::fmt::Error> { ::write_str(self, s) @@ -389,6 +419,227 @@ impl DisplaySink for T { fn span_enter(&mut self, _ty: TokenType) { } fn span_end(&mut self, _ty: TokenType) { } } +*/ + +pub struct BigEnoughString { + content: alloc::string::String, +} + +// TODO: move this to an impl on a handle from BigEnoughString obtained through an `unsafe fn` that +// clearly states requirements +impl fmt::Write for BigEnoughString { + fn write_str(&mut self, s: &str) -> Result<(), core::fmt::Error> { + // SAFETY: todo + let buf = unsafe { self.content.as_mut_vec() }; + let new_bytes = s.as_bytes(); + + // should get DCE + if new_bytes.len() >= 32 { + unsafe { core::hint::unreachable_unchecked() } + } + // should get DCE + if new_bytes.len() == 0 { + unsafe { core::hint::unreachable_unchecked() } + } + + unsafe { + let dest = buf.as_mut_ptr().offset(buf.len() as isize); + let src = new_bytes.as_ptr(); + + let mut rem = new_bytes.len() as isize; + unsafe { + buf.set_len(buf.len() + new_bytes.len()); + } + /* + while rem % 4 > 0 { + dest.offset(rem - 1).write_unaligned(src.offset(rem - 1).read_unaligned()); + rem -= 1; + } + + while rem > 0 { + (dest.offset(rem - 4) as *mut u32).write_unaligned(unsafe { + *core::mem::transmute::<&u8, &u32>(&new_bytes[rem as usize - 4]) + }); + rem -= 4; + } + */ + unsafe { + /* + if rem >= 8 { + rem -= 8; + (dest.offset(rem) as *mut u64).write_unaligned((src.offset(rem) as *const u64).read_unaligned()) + } + if rem >= 4 { + rem -= 4; + (dest.offset(rem) as *mut u32).write_unaligned((src.offset(rem) as *const u32).read_unaligned()); + if rem == 0 { + return; + } + } + if rem >= 2 { + rem -= 2; + (dest.offset(rem) as *mut u16).write_unaligned((src.offset(rem) as *const u16).read_unaligned()); + if rem == 0 { + return; + } + } + if rem >= 1 { + rem -= 1; + (dest.offset(rem) as *mut u8).write_unaligned((src.offset(rem) as *const u8).read_unaligned()) + } + */ + core::arch::asm!( + "6:", + "cmp {rem:e}, 16", + "jb 7f", + "mov {buf:r}, qword ptr [{src} + {rem} - 16]", + "mov qword ptr [{dest} + {rem} - 16], {buf:r}", + "mov {buf:r}, qword ptr [{src} + {rem} - 8]", + "mov qword ptr [{dest} + {rem} - 8], {buf:r}", + "sub {rem:e}, 16", + "jz 11f", + "7:", + "cmp {rem:e}, 8", + "jb 8f", + "mov {buf:r}, qword ptr [{src} + {rem} - 8]", + "mov qword ptr [{dest} + {rem} - 8], {buf:r}", + "sub {rem:e}, 8", + "jz 11f", + "8:", + "cmp {rem:e}, 4", + "jb 9f", + "mov {buf:e}, dword ptr [{src} + {rem} - 4]", + "mov dword ptr [{dest} + {rem} - 4], {buf:e}", + "sub {rem:e}, 4", + "jz 11f", + "9:", + "cmp {rem:e}, 2", + "jb 10f", + "mov {buf:x}, word ptr [{src} + {rem} - 2]", + "mov word ptr [{dest} + {rem} - 2], {buf:x}", + "sub {rem:e}, 2", + "jz 11f", + "10:", + "cmp {rem:e}, 1", + "jb 11f", + "mov {buf:l}, byte ptr [{src} + {rem} - 1]", + "mov byte ptr [{dest} + {rem} - 1], {buf:l}", + "11:", + src = in(reg) src, + dest = in(reg) dest, + rem = inout(reg) rem => _, +// tmp = out(reg) _, + buf = out(reg) _, + options(nostack), + ); + } + /* + unsafe { + core::arch::asm!( + "7:", + "cmp {rem:e}, 4", + "jb 8f", + "sub {rem:e}, 4", + "mov {buf:e}, dword ptr [{src} + {rem}]", + "mov dword ptr [{dest} + {rem}], {buf:e}", + "jmp 7b", + "8:", + "test {rem:e}, {rem:e}", + "jz 10f", + "sub {rem:e}, 1", + "mov {buf:l}, byte ptr [{src} + {rem}]", + "mov byte ptr [{dest} + {rem}], {buf:l}", + "jnz 8b", + "10:", + src = in(reg) src, + dest = in(reg) dest, + rem = in(reg) rem, +// tmp = out(reg) _, + buf = out(reg) _, + options(nostack), + ); + } + */ + /* + unsafe { + core::arch::asm!( + "mov {tmp}, {rem}", + "and {tmp}, 3", + "je 3f", + "sub {rem}, {tmp}", + "2:", + "mov {buf:l}, byte ptr [{src}]", + "mov byte ptr [{dest}], {buf:l}", + "add {src}, 1", + "add {dest}, 1", + "sub {tmp}, 1", + "jnz 2b", + "3:", + "test {rem}, {rem}", + "jz 5f", + "4:", + "sub {rem}, 4", + "mov {buf:e}, dword ptr [{src} + {rem}]", + "mov dword ptr [{dest} + {rem}], {buf:e}", + "jnz 4b", + "5:", + src = in(reg) src, + dest = in(reg) dest, + rem = in(reg) rem, + tmp = out(reg) _, + buf = out(reg) _, + ); + } + */ + /* + for i in 0..new_bytes.len() { + unsafe { + buf.as_mut_ptr().offset(buf.len() as isize).offset(i as isize).write_volatile(new_bytes[i]); + } + } + */ + } + + Ok(()) + } + fn write_char(&mut self, c: char) -> Result<(), core::fmt::Error> { + // SAFETY: TODO: goodness, what + unsafe { + let underlying = self.content.as_mut_vec(); + underlying.as_mut_ptr().offset(underlying.len() as isize).write(c as u8); + underlying.set_len(underlying.len() + 1); + } + Ok(()) + } +} + +impl DisplaySink for BigEnoughString { + fn span_enter(&mut self, ty: TokenType) {} + fn span_end(&mut self, ty: TokenType) {} +} + +impl BigEnoughString { + pub fn into_inner(self) -> alloc::string::String { + self.content + } + + pub fn from_string(mut s: alloc::string::String) -> Self { + s.reserve(256); + // safety: the string is large enough + unsafe { Self::from_string_unchecked(s) } + } + + pub fn new() -> Self { + Self::from_string(alloc::string::String::new()) + } + + /// safety: CALLER MUST ENSURE S IS LARGE ENOUGH TO HOLD ANY DISASSEMBLED x86 INSTRUCTION + unsafe fn from_string_unchecked(s: alloc::string::String) -> Self { + Self { + content: s + } + } +} struct ColorizingOperandVisitor<'a, T, Y> { instr: &'a Instruction, @@ -460,26 +711,26 @@ impl crate::long_mode::OperandVisitor for Coloriz self.f.write_str(regspec_label(&spec))?; self.f.span_end(TokenType::Register); if mask.num != 0 { - self.f.write_str("{")?; + self.f.write_fixed_size("{")?; self.f.span_enter(TokenType::Register); self.f.write_str(regspec_label(&mask))?; self.f.span_end(TokenType::Register); - self.f.write_str("}")?; + self.f.write_fixed_size("}")?; } if let MergeMode::Zero = merge_mode { - self.f.write_str("{z}")?; + self.f.write_fixed_size("{z}")?; } Ok(()) } fn visit_reg_mask_merge_sae(&mut self, spec: RegSpec, mask: RegSpec, merge_mode: MergeMode, sae_mode: crate::long_mode::SaeMode) -> Result { self.f.write_str(regspec_label(&spec))?; if mask.num != 0 { - self.f.write_str("{")?; + self.f.write_fixed_size("{")?; self.f.write_str(regspec_label(&mask))?; - self.f.write_str("}")?; + self.f.write_fixed_size("}")?; } if let MergeMode::Zero = merge_mode { - self.f.write_str("{z}")?; + self.f.write_fixed_size("{z}")?; } self.f.write_str(sae_mode.label())?; Ok(()) @@ -487,19 +738,19 @@ impl crate::long_mode::OperandVisitor for Coloriz fn visit_reg_mask_merge_sae_noround(&mut self, spec: RegSpec, mask: RegSpec, merge_mode: MergeMode) -> Result { self.f.write_str(regspec_label(&spec))?; if mask.num != 0 { - self.f.write_str("{")?; + self.f.write_fixed_size("{")?; self.f.write_str(regspec_label(&mask))?; - self.f.write_str("}")?; + self.f.write_fixed_size("}")?; } if let MergeMode::Zero = merge_mode { - self.f.write_str("{z}")?; + self.f.write_fixed_size("{z}")?; } - self.f.write_str("{sae}")?; + self.f.write_fixed_size("{sae}")?; Ok(()) } fn visit_abs_u32(&mut self, imm: u32) -> Result { self.f.write_str(MEM_SIZE_STRINGS[self.instr.mem_size as usize])?; - self.f.write_str(" ")?; + self.f.write_fixed_size(" ")?; if let Some(prefix) = self.instr.segment_override_for_op(self.op_nr) { let name = prefix.name(); self.f.write_char(name[0] as char)?; @@ -510,7 +761,7 @@ impl crate::long_mode::OperandVisitor for Coloriz } fn visit_abs_u64(&mut self, imm: u64) -> Result { self.f.write_str(MEM_SIZE_STRINGS[self.instr.mem_size as usize])?; - self.f.write_str(" ")?; + self.f.write_fixed_size(" ")?; if let Some(prefix) = self.instr.segment_override_for_op(self.op_nr) { let name = prefix.name(); self.f.write_char(name[0] as char)?; @@ -521,35 +772,35 @@ impl crate::long_mode::OperandVisitor for Coloriz } fn visit_disp(&mut self, reg: RegSpec, disp: i32) -> Result { self.f.write_str(MEM_SIZE_STRINGS[self.instr.mem_size as usize])?; - self.f.write_str(" ")?; + self.f.write_fixed_size(" ")?; if let Some(prefix) = self.instr.segment_override_for_op(self.op_nr) { let name = prefix.name(); self.f.write_char(name[0] as char)?; self.f.write_char(name[1] as char)?; self.f.write_char(':')?; } - self.f.write_str("[")?; + self.f.write_fixed_size("[")?; self.f.write_str(regspec_label(®))?; - self.f.write_str(" ")?; + self.f.write_fixed_size(" ")?; format_number_i32(self.colors, self.f, disp, NumberStyleHint::HexSignedWithSignSplit)?; - self.f.write_str("]") + self.f.write_fixed_size("]") } fn visit_deref(&mut self, reg: RegSpec) -> Result { self.f.write_str(MEM_SIZE_STRINGS[self.instr.mem_size as usize])?; - self.f.write_str(" ")?; + self.f.write_fixed_size(" ")?; if let Some(prefix) = self.instr.segment_override_for_op(self.op_nr) { let name = prefix.name(); self.f.write_char(name[0] as char)?; self.f.write_char(name[1] as char)?; self.f.write_char(':')?; } - self.f.write_str("[")?; + self.f.write_fixed_size("[")?; self.f.write_str(regspec_label(®))?; - self.f.write_str("]") + self.f.write_fixed_size("]") } fn visit_reg_scale(&mut self, reg: RegSpec, scale: u8) -> Result { self.f.write_str(MEM_SIZE_STRINGS[self.instr.mem_size as usize])?; - self.f.write_str(" ")?; + self.f.write_fixed_size(" ")?; if let Some(prefix) = self.instr.segment_override_for_op(self.op_nr) { let name = prefix.name(); self.f.write_char(name[0] as char)?; @@ -563,7 +814,7 @@ impl crate::long_mode::OperandVisitor for Coloriz } fn visit_reg_scale_disp(&mut self, reg: RegSpec, scale: u8, disp: i32) -> Result { self.f.write_str(MEM_SIZE_STRINGS[self.instr.mem_size as usize])?; - self.f.write_str(" ")?; + self.f.write_fixed_size(" ")?; if let Some(prefix) = self.instr.segment_override_for_op(self.op_nr) { let name = prefix.name(); self.f.write_char(name[0] as char)?; @@ -579,7 +830,7 @@ impl crate::long_mode::OperandVisitor for Coloriz } fn visit_index_base_scale(&mut self, base: RegSpec, index: RegSpec, scale: u8) -> Result { self.f.write_str(MEM_SIZE_STRINGS[self.instr.mem_size as usize])?; - self.f.write_str(" ")?; + self.f.write_fixed_size(" ")?; if let Some(prefix) = self.instr.segment_override_for_op(self.op_nr) { let name = prefix.name(); self.f.write_char(name[0] as char)?; @@ -594,7 +845,7 @@ impl crate::long_mode::OperandVisitor for Coloriz } fn visit_index_base_scale_disp(&mut self, base: RegSpec, index: RegSpec, scale: u8, disp: i32) -> Result { self.f.write_str(MEM_SIZE_STRINGS[self.instr.mem_size as usize])?; - self.f.write_str(" ")?; + self.f.write_fixed_size(" ")?; if let Some(prefix) = self.instr.segment_override_for_op(self.op_nr) { let name = prefix.name(); self.f.write_char(name[0] as char)?; @@ -611,7 +862,7 @@ impl crate::long_mode::OperandVisitor for Coloriz } fn visit_reg_disp_masked(&mut self, spec: RegSpec, disp: i32, mask_reg: RegSpec) -> Result { self.f.write_str(MEM_SIZE_STRINGS[self.instr.mem_size as usize])?; - self.f.write_str(" ")?; + self.f.write_fixed_size(" ")?; write!(self.f, "[{} ", regspec_label(&spec))?; format_number_i32(self.colors, self.f, disp, NumberStyleHint::HexSignedWithSignSplit)?; write!(self.f, "]")?; @@ -619,15 +870,15 @@ impl crate::long_mode::OperandVisitor for Coloriz } fn visit_reg_deref_masked(&mut self, spec: RegSpec, mask_reg: RegSpec) -> Result { self.f.write_str(MEM_SIZE_STRINGS[self.instr.mem_size as usize])?; - self.f.write_str(" ")?; - self.f.write_str("[")?; + self.f.write_fixed_size(" ")?; + self.f.write_fixed_size("[")?; self.f.write_str(regspec_label(&spec))?; - self.f.write_str("]")?; + self.f.write_fixed_size("]")?; write!(self.f, "{{{}}}", regspec_label(&mask_reg)) } fn visit_reg_scale_masked(&mut self, spec: RegSpec, scale: u8, mask_reg: RegSpec) -> Result { self.f.write_str(MEM_SIZE_STRINGS[self.instr.mem_size as usize])?; - self.f.write_str(" ")?; + self.f.write_fixed_size(" ")?; write!(self.f, "[{} * {}]", regspec_label(&spec), self.colors.number(scale) @@ -636,7 +887,7 @@ impl crate::long_mode::OperandVisitor for Coloriz } fn visit_reg_scale_disp_masked(&mut self, spec: RegSpec, scale: u8, disp: i32, mask_reg: RegSpec) -> Result { self.f.write_str(MEM_SIZE_STRINGS[self.instr.mem_size as usize])?; - self.f.write_str(" ")?; + self.f.write_fixed_size(" ")?; write!(self.f, "[{} * {} ", regspec_label(&spec), self.colors.number(scale), @@ -647,17 +898,17 @@ impl crate::long_mode::OperandVisitor for Coloriz } fn visit_index_base_masked(&mut self, base: RegSpec, index: RegSpec, mask_reg: RegSpec) -> Result { self.f.write_str(MEM_SIZE_STRINGS[self.instr.mem_size as usize])?; - self.f.write_str(" ")?; - self.f.write_str("[")?; + self.f.write_fixed_size(" ")?; + self.f.write_fixed_size("[")?; self.f.write_str(regspec_label(&base))?; - self.f.write_str(" + ")?; + self.f.write_fixed_size(" + ")?; self.f.write_str(regspec_label(&index))?; - self.f.write_str("]")?; + self.f.write_fixed_size("]")?; write!(self.f, "{{{}}}", regspec_label(&mask_reg)) } fn visit_index_base_disp_masked(&mut self, base: RegSpec, index: RegSpec, disp: i32, mask_reg: RegSpec) -> Result { self.f.write_str(MEM_SIZE_STRINGS[self.instr.mem_size as usize])?; - self.f.write_str(" ")?; + self.f.write_fixed_size(" ")?; write!(self.f, "[{} + {} ", regspec_label(&base), regspec_label(&index), @@ -668,7 +919,7 @@ impl crate::long_mode::OperandVisitor for Coloriz } fn visit_index_base_scale_masked(&mut self, base: RegSpec, index: RegSpec, scale: u8, mask_reg: RegSpec) -> Result { self.f.write_str(MEM_SIZE_STRINGS[self.instr.mem_size as usize])?; - self.f.write_str(" ")?; + self.f.write_fixed_size(" ")?; write!(self.f, "[{} + {} * {}]", regspec_label(&base), regspec_label(&index), @@ -678,7 +929,7 @@ impl crate::long_mode::OperandVisitor for Coloriz } fn visit_index_base_scale_disp_masked(&mut self, base: RegSpec, index: RegSpec, scale: u8, disp: i32, mask_reg: RegSpec) -> Result { self.f.write_str(MEM_SIZE_STRINGS[self.instr.mem_size as usize])?; - self.f.write_str(" ")?; + self.f.write_fixed_size(" ")?; write!(self.f, "[{} + {} * {} ", regspec_label(&base), regspec_label(&index), @@ -3688,52 +3939,6 @@ struct NoContext; extern crate alloc; -trait Writable { - unsafe fn as_mut_vec(&mut self) -> &mut alloc::vec::Vec; - fn into_inner(self) -> T; -} - -impl Writable for alloc::string::String { - unsafe fn as_mut_vec(&mut self) -> &mut alloc::vec::Vec { - self.as_mut_vec() - } - fn into_inner(self) -> alloc::string::String { - self - } -} - -struct BigEnoughString { - content: alloc::string::String, -} - -impl Writable for BigEnoughString { - unsafe fn as_mut_vec(&mut self) -> &mut alloc::vec::Vec { - self.content.as_mut_vec() - } - fn into_inner(self) -> alloc::string::String { - self.content - } -} - -impl BigEnoughString { - pub fn from_string(mut s: alloc::string::String) -> Self { - s.reserve(256); - // safety: the string is large enough - unsafe { Self::from_string_unchecked(s) } - } - - pub fn new() -> Self { - Self::from_string(alloc::string::String::new()) - } - - /// safety: CALLER MUST ENSURE S IS LARGE ENOUGH TO HOLD ANY DISASSEMBLED x86 INSTRUCTION - unsafe fn from_string_unchecked(s: alloc::string::String) -> Self { - Self { - content: s - } - } -} - // TODO: find a better place to put this.... fn c_to_hex(c: u8) -> u8 { /* @@ -4187,6 +4392,7 @@ impl Instruction { } } + /* let address: u64 = 0; let context = Some(&NoContext); let colors = &NoColors; @@ -4839,31 +5045,38 @@ impl Instruction { } } } + */ Ok(()) } - pub fn write_to(&self, out: &mut T) -> fmt::Result { + pub fn write_to(&self, out: &mut T) -> fmt::Result { self.display_with(DisplayStyle::Intel).contextualize(&NoColors, 0, Some(&NoContext), out) } } fn contextualize_intel(instr: &Instruction, colors: &Y, _address: u64, _context: Option<&NoContext>, out: &mut T) -> fmt::Result { + let mut out = NoColorsSink { + out, + }; + let mut out = &mut out; + use core::fmt::Write; + if instr.xacquire() { - out.write_str("xacquire ")?; + out.write_fixed_size("xacquire ")?; } if instr.xrelease() { - out.write_str("xrelease ")?; + out.write_fixed_size("xrelease ")?; } if instr.prefixes.lock() { - out.write_str("lock ")?; + out.write_fixed_size("lock ")?; } if instr.prefixes.rep_any() { if instr.opcode.can_rep() { if instr.prefixes.rep() { - out.write_str("rep ")?; + out.write_fixed_size("rep ")?; } else if instr.prefixes.repnz() { - out.write_str("repnz ")?; + out.write_fixed_size("repnz ")?; } } } @@ -4871,7 +5084,7 @@ fn contextualize_intel(instr: &Instruction, colors: out.write_str(instr.opcode.name())?; if instr.operand_count > 0 { - out.write_str(" ")?; + out.write_fixed_size(" ")?; if instr.visit_operand(0, &mut RelativeBranchPrinter { inst: instr, @@ -4892,7 +5105,7 @@ fn contextualize_intel(instr: &Instruction, colors: for i in 1..instr.operand_count { // don't worry about checking for `instr.operands[i] != Nothing`, it would be a bug to // reach that while iterating only to `operand_count`.. - out.write_str(", ")?; + out.write_fixed_size(", ")?; let mut displayer = ColorizingOperandVisitor { instr, op_nr: i, @@ -5309,6 +5522,12 @@ impl <'instr, T: fmt::Write, Y: YaxColors> ShowContextual #[cfg(feature="std")] impl ShowContextual], T, Y> for Instruction { fn contextualize(&self, colors: &Y, _address: u64, context: Option<&[Option]>, out: &mut T) -> fmt::Result { + let mut out = NoColorsSink { + out, + }; + let mut out = &mut out; + use core::fmt::Write; + if self.prefixes.lock() { write!(out, "lock ")?; } @@ -5390,13 +5609,13 @@ static RELATIVE_BRANCHES: [Opcode; 21] = [ Opcode::JLE, Opcode::JG, ]; -struct RelativeBranchPrinter<'a, Y: YaxColors, F: fmt::Write> { +struct RelativeBranchPrinter<'a, Y: YaxColors, F: DisplaySink> { inst: &'a Instruction, colors: &'a Y, out: &'a mut F, } -impl<'a, Y: YaxColors, F: fmt::Write> crate::long_mode::OperandVisitor for RelativeBranchPrinter<'a, Y, F> { +impl<'a, Y: YaxColors, F: DisplaySink> crate::long_mode::OperandVisitor for RelativeBranchPrinter<'a, Y, F> { // return true if we printed a relative branch offset, false otherwise type Ok = bool; // but errors are errors @@ -5425,7 +5644,7 @@ impl<'a, Y: YaxColors, F: fmt::Write> crate::long_mode::OperandVisitor for Relat self.out.write_char('+')?; // danger_anguished_string_write(&mut self.out, "+"); } - self.out.write_str("0x")?; + self.out.write_fixed_size("0x")?; // danger_anguished_string_write(self.out, "0x"); let mut buf = [core::mem::MaybeUninit::::uninit(); 2]; let mut curr = buf.len(); @@ -5465,7 +5684,7 @@ impl<'a, Y: YaxColors, F: fmt::Write> crate::long_mode::OperandVisitor for Relat self.out.write_char('+')?; // danger_anguished_string_write(&mut self.out, "+"); } - self.out.write_str("0x")?; + self.out.write_fixed_size("0x")?; // danger_anguished_string_write(self.out, "0x"); let mut buf = [core::mem::MaybeUninit::::uninit(); 8]; let mut curr = buf.len(); diff --git a/src/long_mode/mod.rs b/src/long_mode/mod.rs index 41d6f2d..e6d0a02 100644 --- a/src/long_mode/mod.rs +++ b/src/long_mode/mod.rs @@ -8,6 +8,8 @@ pub use crate::MemoryAccessSize; #[cfg(feature = "fmt")] pub use self::display::{DisplayStyle, InstructionDisplayer}; +#[cfg(feature = "fmt")] +pub use self::display::{BigEnoughString, NoColorsSink, DisplaySink, TokenType}; use core::cmp::PartialEq; use crate::safer_unchecked::unreachable_kinda_unchecked as unreachable_unchecked; diff --git a/test/long_mode/mod.rs b/test/long_mode/mod.rs index 21b92e6..8b01461 100644 --- a/test/long_mode/mod.rs +++ b/test/long_mode/mod.rs @@ -62,9 +62,14 @@ fn test_display_under(decoder: &InstDecoder, data: &[u8], expected: &'static str text, expected ); - /* - let mut text2 = String::new(); - instr.write_2(&mut text2); + let mut text2 = yaxpeax_x86::long_mode::BigEnoughString::new(); + let mut out = yaxpeax_x86::long_mode::NoColorsSink { + out: &mut text2, + }; + instr.write_to(&mut out); + core::mem::drop(out); + let text2 = text2.into_inner(); + assert!( text2 == text, "display error for {}:\n decoded: {:?} under decoder {}\n displayed: {}\n expected: {}\n", @@ -74,7 +79,6 @@ fn test_display_under(decoder: &InstDecoder, data: &[u8], expected: &'static str text2, text, ); - */ } else { eprintln!("non-fmt build cannot compare text equality") } From 2ac793524c4472887f60541a43ce74c57d4f22d1 Mon Sep 17 00:00:00 2001 From: iximeow Date: Tue, 18 Jun 2024 13:13:31 -0700 Subject: [PATCH 16/95] figuring out how to handle short variable-size strings --- src/long_mode/display.rs | 463 +++++++++++++++++++++++++++++++-------- 1 file changed, 368 insertions(+), 95 deletions(-) diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index 6f4320b..c7247e7 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -379,6 +379,34 @@ pub trait DisplaySink: fmt::Write { } Ok(()) } + /// write a string to this sink that is less than 32 bytes. this is provided for optimization + /// opportunities when writing a variable-length string with known max size. + /// + /// SAFETY: the provided `s` must be less than 32 bytes. if the provided string is longer than + /// 31 bytes, implementations may only copy part of a multi-byte codepoint while writing to a + /// utf-8 string. this may corrupt Rust strings. + unsafe fn write_lt_32(&mut self, s: &str) -> Result<(), core::fmt::Error> { + self.write_str(s) + + } + /// write a string to this sink that is less than 16 bytes. this is provided for optimization + /// opportunities when writing a variable-length string with known max size. + /// + /// SAFETY: the provided `s` must be less than 16 bytes. if the provided string is longer than + /// 15 bytes, implementations may only copy part of a multi-byte codepoint while writing to a + /// utf-8 string. this may corrupt Rust strings. + unsafe fn write_lt_16(&mut self, s: &str) -> Result<(), core::fmt::Error> { + self.write_str(s) + } + /// write a string to this sink that is less than 8 bytes. this is provided for optimization + /// opportunities when writing a variable-length string with known max size. + /// + /// SAFETY: the provided `s` must be less than 8 bytes. if the provided string is longer than + /// 7 bytes, implementations may only copy part of a multi-byte codepoint while writing to a + /// utf-8 string. this may corrupt Rust strings. + unsafe fn write_lt_8(&mut self, s: &str) -> Result<(), core::fmt::Error> { + self.write_str(s) + } // fn write_char(&mut self, c: char) -> Result<(), core::fmt::Error>; fn span_enter(&mut self, ty: TokenType); fn span_end(&mut self, ty: TokenType); @@ -429,8 +457,26 @@ pub struct BigEnoughString { // clearly states requirements impl fmt::Write for BigEnoughString { fn write_str(&mut self, s: &str) -> Result<(), core::fmt::Error> { + self.content.write_str(s) + } + fn write_char(&mut self, c: char) -> Result<(), core::fmt::Error> { + // SAFETY: TODO: goodness, what + unsafe { + let underlying = self.content.as_mut_vec(); + underlying.as_mut_ptr().offset(underlying.len() as isize).write(c as u8); + underlying.set_len(underlying.len() + 1); + } + Ok(()) + } +} + +// TODO: delete this whole thing? maybe? +impl DisplaySink for alloc::string::String { + unsafe fn write_lt_32(&mut self, s: &str) -> Result<(), fmt::Error> { + self.reserve(s.len()); + // SAFETY: todo - let buf = unsafe { self.content.as_mut_vec() }; + let buf = unsafe { self.as_mut_vec() }; let new_bytes = s.as_bytes(); // should get DCE @@ -443,51 +489,223 @@ impl fmt::Write for BigEnoughString { } unsafe { - let dest = buf.as_mut_ptr().offset(buf.len() as isize); - let src = new_bytes.as_ptr(); + let dest = buf.as_mut_ptr().offset(buf.len() as isize); + let src = new_bytes.as_ptr(); + + let mut rem = new_bytes.len() as isize; + + core::arch::asm!( + "6:", + "cmp {rem:e}, 16", + "jb 7f", + "mov {buf:r}, qword ptr [{src} + {rem} - 16]", + "mov qword ptr [{dest} + {rem} - 16], {buf:r}", + "mov {buf:r}, qword ptr [{src} + {rem} - 8]", + "mov qword ptr [{dest} + {rem} - 8], {buf:r}", + "sub {rem:e}, 16", + "jz 11f", + "7:", + "cmp {rem:e}, 8", + "jb 8f", + "mov {buf:r}, qword ptr [{src} + {rem} - 8]", + "mov qword ptr [{dest} + {rem} - 8], {buf:r}", + "sub {rem:e}, 8", + "jz 11f", + "8:", + "cmp {rem:e}, 4", + "jb 9f", + "mov {buf:e}, dword ptr [{src} + {rem} - 4]", + "mov dword ptr [{dest} + {rem} - 4], {buf:e}", + "sub {rem:e}, 4", + "jz 11f", + "9:", + "cmp {rem:e}, 2", + "jb 10f", + "mov {buf:x}, word ptr [{src} + {rem} - 2]", + "mov word ptr [{dest} + {rem} - 2], {buf:x}", + "sub {rem:e}, 2", + "jz 11f", + "10:", + "cmp {rem:e}, 1", + "jb 11f", + "mov {buf:l}, byte ptr [{src} + {rem} - 1]", + "mov byte ptr [{dest} + {rem} - 1], {buf:l}", + "11:", + src = in(reg) src, + dest = in(reg) dest, + rem = inout(reg) rem => _, + buf = out(reg) _, + options(nostack), + ); - let mut rem = new_bytes.len() as isize; - unsafe { buf.set_len(buf.len() + new_bytes.len()); } /* - while rem % 4 > 0 { - dest.offset(rem - 1).write_unaligned(src.offset(rem - 1).read_unaligned()); - rem -= 1; + for i in 0..new_bytes.len() { + unsafe { + buf.as_mut_ptr().offset(buf.len() as isize).offset(i as isize).write_volatile(new_bytes[i]); + } } + */ + + Ok(()) + } + unsafe fn write_lt_16(&mut self, s: &str) -> Result<(), fmt::Error> { + self.reserve(s.len()); + + // SAFETY: todo + let buf = unsafe { self.as_mut_vec() }; + let new_bytes = s.as_bytes(); - while rem > 0 { - (dest.offset(rem - 4) as *mut u32).write_unaligned(unsafe { - *core::mem::transmute::<&u8, &u32>(&new_bytes[rem as usize - 4]) - }); - rem -= 4; + // should get DCE + if new_bytes.len() >= 16 { + unsafe { core::hint::unreachable_unchecked() } } - */ + // should get DCE + if new_bytes.len() == 0 { + unsafe { core::hint::unreachable_unchecked() } + } + unsafe { - /* - if rem >= 8 { - rem -= 8; - (dest.offset(rem) as *mut u64).write_unaligned((src.offset(rem) as *const u64).read_unaligned()) - } - if rem >= 4 { - rem -= 4; - (dest.offset(rem) as *mut u32).write_unaligned((src.offset(rem) as *const u32).read_unaligned()); - if rem == 0 { - return; - } - } - if rem >= 2 { - rem -= 2; - (dest.offset(rem) as *mut u16).write_unaligned((src.offset(rem) as *const u16).read_unaligned()); - if rem == 0 { - return; - } + let dest = buf.as_mut_ptr().offset(buf.len() as isize); + let src = new_bytes.as_ptr(); + + let mut rem = new_bytes.len() as isize; + + core::arch::asm!( + "7:", + "cmp {rem:e}, 8", + "jb 8f", + "mov {buf:r}, qword ptr [{src} + {rem} - 8]", + "mov qword ptr [{dest} + {rem} - 8], {buf:r}", + "sub {rem:e}, 8", + "jz 11f", + "8:", + "cmp {rem:e}, 4", + "jb 9f", + "mov {buf:e}, dword ptr [{src} + {rem} - 4]", + "mov dword ptr [{dest} + {rem} - 4], {buf:e}", + "sub {rem:e}, 4", + "jz 11f", + "9:", + "cmp {rem:e}, 2", + "jb 10f", + "mov {buf:x}, word ptr [{src} + {rem} - 2]", + "mov word ptr [{dest} + {rem} - 2], {buf:x}", + "sub {rem:e}, 2", + "jz 11f", + "10:", + "cmp {rem:e}, 1", + "jb 11f", + "mov {buf:l}, byte ptr [{src} + {rem} - 1]", + "mov byte ptr [{dest} + {rem} - 1], {buf:l}", + "11:", + src = in(reg) src, + dest = in(reg) dest, + rem = inout(reg) rem => _, + buf = out(reg) _, + options(nostack), + ); + + buf.set_len(buf.len() + new_bytes.len()); + } + /* + for i in 0..new_bytes.len() { + unsafe { + buf.as_mut_ptr().offset(buf.len() as isize).offset(i as isize).write_volatile(new_bytes[i]); } - if rem >= 1 { - rem -= 1; - (dest.offset(rem) as *mut u8).write_unaligned((src.offset(rem) as *const u8).read_unaligned()) + } + */ + + Ok(()) + } + unsafe fn write_lt_8(&mut self, s: &str) -> Result<(), fmt::Error> { + self.reserve(s.len()); + + // SAFETY: todo + let buf = unsafe { self.as_mut_vec() }; + let new_bytes = s.as_bytes(); + + // should get DCE + if new_bytes.len() >= 8 { + unsafe { core::hint::unreachable_unchecked() } + } + // should get DCE + if new_bytes.len() == 0 { + unsafe { core::hint::unreachable_unchecked() } + } + + unsafe { + let dest = buf.as_mut_ptr().offset(buf.len() as isize); + let src = new_bytes.as_ptr(); + + let mut rem = new_bytes.len() as isize; + + core::arch::asm!( + "8:", + "cmp {rem:e}, 4", + "jb 9f", + "mov {buf:e}, dword ptr [{src} + {rem} - 4]", + "mov dword ptr [{dest} + {rem} - 4], {buf:e}", + "sub {rem:e}, 4", + "jz 11f", + "9:", + "cmp {rem:e}, 2", + "jb 10f", + "mov {buf:x}, word ptr [{src} + {rem} - 2]", + "mov word ptr [{dest} + {rem} - 2], {buf:x}", + "sub {rem:e}, 2", + "jz 11f", + "10:", + "cmp {rem:e}, 1", + "jb 11f", + "mov {buf:l}, byte ptr [{src} + {rem} - 1]", + "mov byte ptr [{dest} + {rem} - 1], {buf:l}", + "11:", + src = in(reg) src, + dest = in(reg) dest, + rem = inout(reg) rem => _, + buf = out(reg) _, + options(nostack), + ); + + buf.set_len(buf.len() + new_bytes.len()); + } + /* + for i in 0..new_bytes.len() { + unsafe { + buf.as_mut_ptr().offset(buf.len() as isize).offset(i as isize).write_volatile(new_bytes[i]); } - */ + } + */ + + Ok(()) + } + fn span_enter(&mut self, ty: TokenType) {} + fn span_end(&mut self, ty: TokenType) {} +} + +impl DisplaySink for BigEnoughString { + unsafe fn write_lt_32(&mut self, s: &str) -> Result<(), fmt::Error> { + // SAFETY: todo + let buf = unsafe { self.content.as_mut_vec() }; + let new_bytes = s.as_bytes(); + + // should get DCE + if new_bytes.len() >= 32 { + unsafe { core::hint::unreachable_unchecked() } + } + // should get DCE + if new_bytes.len() == 0 { + unsafe { core::hint::unreachable_unchecked() } + } + + unsafe { + let dest = buf.as_mut_ptr().offset(buf.len() as isize); + let src = new_bytes.as_ptr(); + + let mut rem = new_bytes.len() as isize; + core::arch::asm!( "6:", "cmp {rem:e}, 16", @@ -528,69 +746,139 @@ impl fmt::Write for BigEnoughString { src = in(reg) src, dest = in(reg) dest, rem = inout(reg) rem => _, -// tmp = out(reg) _, buf = out(reg) _, options(nostack), ); + + buf.set_len(buf.len() + new_bytes.len()); } /* + for i in 0..new_bytes.len() { + unsafe { + buf.as_mut_ptr().offset(buf.len() as isize).offset(i as isize).write_volatile(new_bytes[i]); + } + } + */ + + Ok(()) + } + unsafe fn write_lt_16(&mut self, s: &str) -> Result<(), fmt::Error> { + // SAFETY: todo + let buf = unsafe { self.content.as_mut_vec() }; + let new_bytes = s.as_bytes(); + + // should get DCE + if new_bytes.len() >= 16 { + unsafe { core::hint::unreachable_unchecked() } + } + // should get DCE + if new_bytes.len() == 0 { + unsafe { core::hint::unreachable_unchecked() } + } + unsafe { + let dest = buf.as_mut_ptr().offset(buf.len() as isize); + let src = new_bytes.as_ptr(); + + let mut rem = new_bytes.len() as isize; + core::arch::asm!( "7:", - "cmp {rem:e}, 4", + "cmp {rem:e}, 8", "jb 8f", - "sub {rem:e}, 4", - "mov {buf:e}, dword ptr [{src} + {rem}]", - "mov dword ptr [{dest} + {rem}], {buf:e}", - "jmp 7b", + "mov {buf:r}, qword ptr [{src} + {rem} - 8]", + "mov qword ptr [{dest} + {rem} - 8], {buf:r}", + "sub {rem:e}, 8", + "jz 11f", "8:", - "test {rem:e}, {rem:e}", - "jz 10f", - "sub {rem:e}, 1", - "mov {buf:l}, byte ptr [{src} + {rem}]", - "mov byte ptr [{dest} + {rem}], {buf:l}", - "jnz 8b", + "cmp {rem:e}, 4", + "jb 9f", + "mov {buf:e}, dword ptr [{src} + {rem} - 4]", + "mov dword ptr [{dest} + {rem} - 4], {buf:e}", + "sub {rem:e}, 4", + "jz 11f", + "9:", + "cmp {rem:e}, 2", + "jb 10f", + "mov {buf:x}, word ptr [{src} + {rem} - 2]", + "mov word ptr [{dest} + {rem} - 2], {buf:x}", + "sub {rem:e}, 2", + "jz 11f", "10:", + "cmp {rem:e}, 1", + "jb 11f", + "mov {buf:l}, byte ptr [{src} + {rem} - 1]", + "mov byte ptr [{dest} + {rem} - 1], {buf:l}", + "11:", src = in(reg) src, dest = in(reg) dest, - rem = in(reg) rem, -// tmp = out(reg) _, + rem = inout(reg) rem => _, buf = out(reg) _, options(nostack), ); + + buf.set_len(buf.len() + new_bytes.len()); } - */ /* + for i in 0..new_bytes.len() { + unsafe { + buf.as_mut_ptr().offset(buf.len() as isize).offset(i as isize).write_volatile(new_bytes[i]); + } + } + */ + + Ok(()) + } + unsafe fn write_lt_8(&mut self, s: &str) -> Result<(), fmt::Error> { + // SAFETY: todo + let buf = unsafe { self.content.as_mut_vec() }; + let new_bytes = s.as_bytes(); + + // should get DCE + if new_bytes.len() >= 8 { + unsafe { core::hint::unreachable_unchecked() } + } + // should get DCE + if new_bytes.len() == 0 { + unsafe { core::hint::unreachable_unchecked() } + } + unsafe { + let dest = buf.as_mut_ptr().offset(buf.len() as isize); + let src = new_bytes.as_ptr(); + + let mut rem = new_bytes.len() as isize; + core::arch::asm!( - "mov {tmp}, {rem}", - "and {tmp}, 3", - "je 3f", - "sub {rem}, {tmp}", - "2:", - "mov {buf:l}, byte ptr [{src}]", - "mov byte ptr [{dest}], {buf:l}", - "add {src}, 1", - "add {dest}, 1", - "sub {tmp}, 1", - "jnz 2b", - "3:", - "test {rem}, {rem}", - "jz 5f", - "4:", - "sub {rem}, 4", - "mov {buf:e}, dword ptr [{src} + {rem}]", - "mov dword ptr [{dest} + {rem}], {buf:e}", - "jnz 4b", - "5:", + "8:", + "cmp {rem:e}, 4", + "jb 9f", + "mov {buf:e}, dword ptr [{src} + {rem} - 4]", + "mov dword ptr [{dest} + {rem} - 4], {buf:e}", + "sub {rem:e}, 4", + "jz 11f", + "9:", + "cmp {rem:e}, 2", + "jb 10f", + "mov {buf:x}, word ptr [{src} + {rem} - 2]", + "mov word ptr [{dest} + {rem} - 2], {buf:x}", + "sub {rem:e}, 2", + "jz 11f", + "10:", + "cmp {rem:e}, 1", + "jb 11f", + "mov {buf:l}, byte ptr [{src} + {rem} - 1]", + "mov byte ptr [{dest} + {rem} - 1], {buf:l}", + "11:", src = in(reg) src, dest = in(reg) dest, - rem = in(reg) rem, - tmp = out(reg) _, + rem = inout(reg) rem => _, buf = out(reg) _, + options(nostack), ); + + buf.set_len(buf.len() + new_bytes.len()); } - */ /* for i in 0..new_bytes.len() { unsafe { @@ -598,22 +886,9 @@ impl fmt::Write for BigEnoughString { } } */ - } Ok(()) } - fn write_char(&mut self, c: char) -> Result<(), core::fmt::Error> { - // SAFETY: TODO: goodness, what - unsafe { - let underlying = self.content.as_mut_vec(); - underlying.as_mut_ptr().offset(underlying.len() as isize).write(c as u8); - underlying.set_len(underlying.len() + 1); - } - Ok(()) - } -} - -impl DisplaySink for BigEnoughString { fn span_enter(&mut self, ty: TokenType) {} fn span_end(&mut self, ty: TokenType) {} } @@ -702,18 +977,18 @@ impl crate::long_mode::OperandVisitor for Coloriz } fn visit_reg(&mut self, reg: RegSpec) -> Result { self.f.span_enter(TokenType::Register); - self.f.write_str(regspec_label(®))?; + unsafe { self.f.write_lt_8(regspec_label(®))?; } self.f.span_end(TokenType::Register); Ok(()) } fn visit_reg_mask_merge(&mut self, spec: RegSpec, mask: RegSpec, merge_mode: MergeMode) -> Result { self.f.span_enter(TokenType::Register); - self.f.write_str(regspec_label(&spec))?; + unsafe { self.f.write_lt_8(regspec_label(&spec))?; } self.f.span_end(TokenType::Register); if mask.num != 0 { self.f.write_fixed_size("{")?; self.f.span_enter(TokenType::Register); - self.f.write_str(regspec_label(&mask))?; + unsafe { self.f.write_lt_8(regspec_label(&mask))?; } self.f.span_end(TokenType::Register); self.f.write_fixed_size("}")?; } @@ -726,7 +1001,7 @@ impl crate::long_mode::OperandVisitor for Coloriz self.f.write_str(regspec_label(&spec))?; if mask.num != 0 { self.f.write_fixed_size("{")?; - self.f.write_str(regspec_label(&mask))?; + unsafe { self.f.write_lt_8(regspec_label(&mask))?; } self.f.write_fixed_size("}")?; } if let MergeMode::Zero = merge_mode { @@ -780,7 +1055,7 @@ impl crate::long_mode::OperandVisitor for Coloriz self.f.write_char(':')?; } self.f.write_fixed_size("[")?; - self.f.write_str(regspec_label(®))?; + unsafe { self.f.write_lt_8(regspec_label(®))?; } self.f.write_fixed_size(" ")?; format_number_i32(self.colors, self.f, disp, NumberStyleHint::HexSignedWithSignSplit)?; self.f.write_fixed_size("]") @@ -795,7 +1070,7 @@ impl crate::long_mode::OperandVisitor for Coloriz self.f.write_char(':')?; } self.f.write_fixed_size("[")?; - self.f.write_str(regspec_label(®))?; + unsafe { self.f.write_lt_8(regspec_label(®))?; } self.f.write_fixed_size("]") } fn visit_reg_scale(&mut self, reg: RegSpec, scale: u8) -> Result { @@ -3959,8 +4234,6 @@ impl Instruction { pub fn write_2(&self, out: &mut alloc::string::String) -> fmt::Result { use core::fmt::Write; - unsafe { out.as_mut_vec().reserve(64) }; - fn anguished_string_write(out: &mut alloc::string::String, label: &str) { let new_bytes = label.as_bytes(); let buf = unsafe { out.as_mut_vec() }; From 00dc2b64849328cc48c809163ad9fd105d8439aa Mon Sep 17 00:00:00 2001 From: iximeow Date: Tue, 18 Jun 2024 13:41:52 -0700 Subject: [PATCH 17/95] helper to clear BigEnoughString --- src/long_mode/display.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index c7247e7..a49cbd0 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -894,6 +894,10 @@ impl DisplaySink for BigEnoughString { } impl BigEnoughString { + pub fn clear(&mut self) { + self.content.clear(); + } + pub fn into_inner(self) -> alloc::string::String { self.content } From 4af752a76094e921a614fa4bec0bcf7fae3d8f6f Mon Sep 17 00:00:00 2001 From: iximeow Date: Tue, 18 Jun 2024 13:42:10 -0700 Subject: [PATCH 18/95] a few more accurate hints --- src/long_mode/display.rs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index a49cbd0..b9c9f79 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -5358,7 +5358,8 @@ fn contextualize_intel(instr: &Instruction, colors: } } - out.write_str(instr.opcode.name())?; + // TODO: no x86 instruction longer than 32 bytes? + unsafe { out.write_lt_32(instr.opcode.name())? }; if instr.operand_count > 0 { out.write_fixed_size(" ")?; @@ -5940,7 +5941,8 @@ impl<'a, Y: YaxColors, F: DisplaySink> crate::long_mode::OperandVisitor for Rela core::mem::transmute::<&[core::mem::MaybeUninit], &str>(buf) }; - self.out.write_str(s)?; + // not actually fixed size, but this should optimize right i hope.. + self.out.write_fixed_size(s)?; // anguished_string_write(&mut self.out, s); Ok(true) } else { @@ -5983,7 +5985,8 @@ impl<'a, Y: YaxColors, F: DisplaySink> crate::long_mode::OperandVisitor for Rela // danger_anguished_string_write(&mut self.out, s); // danger_anguished_variable_length_bstring_write(unsafe { self.out.as_mut_vec() }, s.as_bytes()); - self.out.write_str(s)?; + // not actually fixed size, but this should optimize right i hope.. + self.out.write_fixed_size(s)?; Ok(true) } else { Ok(false) From 49f54724a3b0b2497bd944f7638cb6e8ff1d18dc Mon Sep 17 00:00:00 2001 From: iximeow Date: Tue, 18 Jun 2024 13:52:03 -0700 Subject: [PATCH 19/95] less integer formatting in operands --- src/long_mode/display.rs | 160 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 153 insertions(+), 7 deletions(-) diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index b9c9f79..e3f5b0a 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -927,49 +927,195 @@ struct ColorizingOperandVisitor<'a, T, Y> { f: &'a mut T, } +use core::mem::MaybeUninit; + impl crate::long_mode::OperandVisitor for ColorizingOperandVisitor<'_, T, Y> { type Ok = (); type Error = core::fmt::Error; fn visit_u8(&mut self, imm: u8) -> Result { self.f.span_enter(TokenType::Immediate); - write!(self.f, "{}", u8_hex(imm))?; + self.f.write_fixed_size("0x")?; + let mut buf = [MaybeUninit::::uninit(); 2]; + let mut curr = buf.len(); + let mut v = imm; + loop { + let digit = v % 16; + let c = c_to_hex(digit as u8); + curr -= 1; + buf[curr].write(c); + v = v / 16; + if v == 0 { + break; + } + } + let buf = &buf[curr..]; + let s: &str = unsafe { + core::mem::transmute::<&[MaybeUninit], &str>(buf) + }; + self.f.write_fixed_size(s)?; self.f.span_end(TokenType::Immediate); Ok(()) } fn visit_i8(&mut self, imm: i8) -> Result { self.f.span_enter(TokenType::Immediate); - write!(self.f, "{}", signed_i8_hex(imm))?; + let mut v = imm as u8; + if imm < 0 { + self.f.write_char('-')?; + v = -imm as u8; + } + self.f.write_fixed_size("0x")?; + let mut buf = [core::mem::MaybeUninit::::uninit(); 2]; + let mut curr = buf.len(); + loop { + let digit = v % 16; + let c = c_to_hex(digit as u8); + curr -= 1; + buf[curr].write(c); + v = v / 16; + if v == 0 { + break; + } + } + let buf = &buf[curr..]; + let s = unsafe { + core::mem::transmute::<&[core::mem::MaybeUninit], &str>(buf) + }; + + // not actually fixed size, but this should optimize right i hope.. + self.f.write_fixed_size(s)?; self.f.span_end(TokenType::Immediate); Ok(()) } fn visit_u16(&mut self, imm: u16) -> Result { self.f.span_enter(TokenType::Immediate); - write!(self.f, "{}", u16_hex(imm))?; + self.f.write_fixed_size("0x")?; + let mut buf = [MaybeUninit::::uninit(); 4]; + let mut curr = buf.len(); + let mut v = imm; + loop { + let digit = v % 16; + let c = c_to_hex(digit as u8); + curr -= 1; + buf[curr].write(c); + v = v / 16; + if v == 0 { + break; + } + } + let buf = &buf[curr..]; + let s: &str = unsafe { + core::mem::transmute::<&[MaybeUninit], &str>(buf) + }; + self.f.write_fixed_size(s)?; self.f.span_end(TokenType::Immediate); Ok(()) } fn visit_i16(&mut self, imm: i16) -> Result { self.f.span_enter(TokenType::Immediate); - write!(self.f, "{}", signed_i16_hex(imm))?; + let mut v = imm as u16; + if imm < 0 { + self.f.write_char('-')?; + v = -imm as u16; + } + self.f.write_fixed_size("0x")?; + let mut buf = [core::mem::MaybeUninit::::uninit(); 4]; + let mut curr = buf.len(); + loop { + let digit = v % 16; + let c = c_to_hex(digit as u8); + curr -= 1; + buf[curr].write(c); + v = v / 16; + if v == 0 { + break; + } + } + let buf = &buf[curr..]; + let s = unsafe { + core::mem::transmute::<&[core::mem::MaybeUninit], &str>(buf) + }; + + // not actually fixed size, but this should optimize right i hope.. + self.f.write_fixed_size(s)?; self.f.span_end(TokenType::Immediate); Ok(()) } fn visit_u32(&mut self, imm: u32) -> Result { self.f.span_enter(TokenType::Immediate); - write!(self.f, "{}", u32_hex(imm))?; + self.f.write_fixed_size("0x")?; + let mut buf = [MaybeUninit::::uninit(); 8]; + let mut curr = buf.len(); + let mut v = imm; + loop { + let digit = v % 16; + let c = c_to_hex(digit as u8); + curr -= 1; + buf[curr].write(c); + v = v / 16; + if v == 0 { + break; + } + } + let buf = &buf[curr..]; + let s: &str = unsafe { + core::mem::transmute::<&[MaybeUninit], &str>(buf) + }; + self.f.write_fixed_size(s)?; self.f.span_end(TokenType::Immediate); Ok(()) } fn visit_i32(&mut self, imm: i32) -> Result { self.f.span_enter(TokenType::Immediate); - write!(self.f, "{}", signed_i32_hex(imm))?; + let mut v = imm as u32; + if imm < 0 { + self.f.write_char('-')?; + v = -imm as u32; + } + self.f.write_fixed_size("0x")?; + let mut buf = [core::mem::MaybeUninit::::uninit(); 8]; + let mut curr = buf.len(); + loop { + let digit = v % 16; + let c = c_to_hex(digit as u8); + curr -= 1; + buf[curr].write(c); + v = v / 16; + if v == 0 { + break; + } + } + let buf = &buf[curr..]; + let s = unsafe { + core::mem::transmute::<&[core::mem::MaybeUninit], &str>(buf) + }; + + // not actually fixed size, but this should optimize right i hope.. + self.f.write_fixed_size(s)?; self.f.span_end(TokenType::Immediate); Ok(()) } fn visit_u64(&mut self, imm: u64) -> Result { self.f.span_enter(TokenType::Immediate); - write!(self.f, "{}", u64_hex(imm))?; + self.f.write_fixed_size("0x")?; + let mut buf = [MaybeUninit::::uninit(); 16]; + let mut curr = buf.len(); + let mut v = imm; + loop { + let digit = v % 16; + let c = c_to_hex(digit as u8); + curr -= 1; + buf[curr].write(c); + v = v / 16; + if v == 0 { + break; + } + } + let buf = &buf[curr..]; + let s: &str = unsafe { + core::mem::transmute::<&[MaybeUninit], &str>(buf) + }; + self.f.write_fixed_size(s)?; self.f.span_end(TokenType::Immediate); Ok(()) } From 758ddc604b6300c48ae14dbdc8d48720124658f2 Mon Sep 17 00:00:00 2001 From: iximeow Date: Tue, 18 Jun 2024 13:59:33 -0700 Subject: [PATCH 20/95] move away from fmt for visit_i64 and displacements too --- src/long_mode/display.rs | 56 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 54 insertions(+), 2 deletions(-) diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index e3f5b0a..0e09173 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -1121,7 +1121,31 @@ impl crate::long_mode::OperandVisitor for Coloriz } fn visit_i64(&mut self, imm: i64) -> Result { self.f.span_enter(TokenType::Immediate); - write!(self.f, "{}", signed_i64_hex(imm))?; + let mut v = imm as u64; + if imm < 0 { + self.f.write_char('-')?; + v = -imm as u64; + } + self.f.write_fixed_size("0x")?; + let mut buf = [core::mem::MaybeUninit::::uninit(); 64]; + let mut curr = buf.len(); + loop { + let digit = v % 16; + let c = c_to_hex(digit as u8); + curr -= 1; + buf[curr].write(c); + v = v / 16; + if v == 0 { + break; + } + } + let buf = &buf[curr..]; + let s = unsafe { + core::mem::transmute::<&[core::mem::MaybeUninit], &str>(buf) + }; + + // not actually fixed size, but this should optimize right i hope.. + self.f.write_fixed_size(s)?; self.f.span_end(TokenType::Immediate); Ok(()) } @@ -1207,7 +1231,35 @@ impl crate::long_mode::OperandVisitor for Coloriz self.f.write_fixed_size("[")?; unsafe { self.f.write_lt_8(regspec_label(®))?; } self.f.write_fixed_size(" ")?; - format_number_i32(self.colors, self.f, disp, NumberStyleHint::HexSignedWithSignSplit)?; + + { + let mut v = disp as u32; + if disp < 0 { + self.f.write_fixed_size("- 0x")?; + v = -disp as u32; + } else { + self.f.write_fixed_size("+ 0x")?; + } + let mut buf = [core::mem::MaybeUninit::::uninit(); 8]; + let mut curr = buf.len(); + loop { + let digit = v % 16; + let c = c_to_hex(digit as u8); + curr -= 1; + buf[curr].write(c); + v = v / 16; + if v == 0 { + break; + } + } + let buf = &buf[curr..]; + let s = unsafe { + core::mem::transmute::<&[core::mem::MaybeUninit], &str>(buf) + }; + + // not actually fixed size, but this should optimize right i hope.. + self.f.write_fixed_size(s)?; + } self.f.write_fixed_size("]") } fn visit_deref(&mut self, reg: RegSpec) -> Result { From 4142a4a16b62ce9f80d796e930518c169c52d587 Mon Sep 17 00:00:00 2001 From: iximeow Date: Tue, 18 Jun 2024 14:10:32 -0700 Subject: [PATCH 21/95] move non-avx512 operand printing away from fmt --- src/long_mode/display.rs | 106 +++++++++++++++++++++++++++++++-------- 1 file changed, 85 insertions(+), 21 deletions(-) diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index 0e09173..46227fb 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -1284,10 +1284,13 @@ impl crate::long_mode::OperandVisitor for Coloriz self.f.write_char(name[1] as char)?; self.f.write_char(':')?; } - write!(self.f, "[{} * {}]", - regspec_label(®), - self.colors.number(scale) - ) + self.f.write_fixed_size("[")?; + unsafe { self.f.write_lt_8(regspec_label(®))?; } + self.f.write_fixed_size(" * ")?; + self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc + self.f.write_fixed_size("]")?; + + Ok(()) } fn visit_reg_scale_disp(&mut self, reg: RegSpec, scale: u8, disp: i32) -> Result { self.f.write_str(MEM_SIZE_STRINGS[self.instr.mem_size as usize])?; @@ -1298,11 +1301,40 @@ impl crate::long_mode::OperandVisitor for Coloriz self.f.write_char(name[1] as char)?; self.f.write_char(':')?; } - write!(self.f, "[{} * {} ", - regspec_label(®), - self.colors.number(scale), - )?; - format_number_i32(self.colors, self.f, disp, NumberStyleHint::HexSignedWithSignSplit)?; + self.f.write_fixed_size("[")?; + unsafe { self.f.write_lt_8(regspec_label(®))?; } + self.f.write_fixed_size(" * ")?; + self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc + self.f.write_fixed_size(" ")?; + + { + let mut v = disp as u32; + if disp < 0 { + self.f.write_fixed_size("- 0x")?; + v = -disp as u32; + } else { + self.f.write_fixed_size("+ 0x")?; + } + let mut buf = [core::mem::MaybeUninit::::uninit(); 8]; + let mut curr = buf.len(); + loop { + let digit = v % 16; + let c = c_to_hex(digit as u8); + curr -= 1; + buf[curr].write(c); + v = v / 16; + if v == 0 { + break; + } + } + let buf = &buf[curr..]; + let s = unsafe { + core::mem::transmute::<&[core::mem::MaybeUninit], &str>(buf) + }; + + // not actually fixed size, but this should optimize right i hope.. + self.f.write_fixed_size(s)?; + } write!(self.f, "]") } fn visit_index_base_scale(&mut self, base: RegSpec, index: RegSpec, scale: u8) -> Result { @@ -1314,11 +1346,13 @@ impl crate::long_mode::OperandVisitor for Coloriz self.f.write_char(name[1] as char)?; self.f.write_char(':')?; } - write!(self.f, "[{} + {} * {}]", - regspec_label(&base), - regspec_label(&index), - self.colors.number(scale) - ) + self.f.write_fixed_size("[")?; + unsafe { self.f.write_lt_8(regspec_label(&base))?; } + self.f.write_fixed_size(" + ")?; + unsafe { self.f.write_lt_8(regspec_label(&index))?; } + self.f.write_fixed_size(" * ")?; + self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc + self.f.write_fixed_size("]") } fn visit_index_base_scale_disp(&mut self, base: RegSpec, index: RegSpec, scale: u8, disp: i32) -> Result { self.f.write_str(MEM_SIZE_STRINGS[self.instr.mem_size as usize])?; @@ -1329,13 +1363,43 @@ impl crate::long_mode::OperandVisitor for Coloriz self.f.write_char(name[1] as char)?; self.f.write_char(':')?; } - write!(self.f, "[{} + {} * {} ", - regspec_label(&base), - regspec_label(&index), - self.colors.number(scale), - )?; - format_number_i32(self.colors, self.f, disp, NumberStyleHint::HexSignedWithSignSplit)?; - write!(self.f, "]") + self.f.write_fixed_size("[")?; + unsafe { self.f.write_lt_8(regspec_label(&base))?; } + self.f.write_fixed_size(" + ")?; + unsafe { self.f.write_lt_8(regspec_label(&index))?; } + self.f.write_fixed_size(" * ")?; + self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc + self.f.write_fixed_size(" ")?; + + { + let mut v = disp as u32; + if disp < 0 { + self.f.write_fixed_size("- 0x")?; + v = -disp as u32; + } else { + self.f.write_fixed_size("+ 0x")?; + } + let mut buf = [core::mem::MaybeUninit::::uninit(); 8]; + let mut curr = buf.len(); + loop { + let digit = v % 16; + let c = c_to_hex(digit as u8); + curr -= 1; + buf[curr].write(c); + v = v / 16; + if v == 0 { + break; + } + } + let buf = &buf[curr..]; + let s = unsafe { + core::mem::transmute::<&[core::mem::MaybeUninit], &str>(buf) + }; + + // not actually fixed size, but this should optimize right i hope.. + self.f.write_fixed_size(s)?; + } + self.f.write_fixed_size("]") } fn visit_reg_disp_masked(&mut self, spec: RegSpec, disp: i32, mask_reg: RegSpec) -> Result { self.f.write_str(MEM_SIZE_STRINGS[self.instr.mem_size as usize])?; From 53012e24ee9179a911cc6a3a8c4dc30cb9906c70 Mon Sep 17 00:00:00 2001 From: iximeow Date: Tue, 18 Jun 2024 14:25:48 -0700 Subject: [PATCH 22/95] mem size strings are all 7b or less --- src/long_mode/display.rs | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index 46227fb..d7a80cb 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -1198,7 +1198,7 @@ impl crate::long_mode::OperandVisitor for Coloriz Ok(()) } fn visit_abs_u32(&mut self, imm: u32) -> Result { - self.f.write_str(MEM_SIZE_STRINGS[self.instr.mem_size as usize])?; + unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS[self.instr.mem_size as usize])? }; self.f.write_fixed_size(" ")?; if let Some(prefix) = self.instr.segment_override_for_op(self.op_nr) { let name = prefix.name(); @@ -1209,7 +1209,7 @@ impl crate::long_mode::OperandVisitor for Coloriz write!(self.f, "[{}]", self.colors.address(u32_hex(imm))) } fn visit_abs_u64(&mut self, imm: u64) -> Result { - self.f.write_str(MEM_SIZE_STRINGS[self.instr.mem_size as usize])?; + unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS[self.instr.mem_size as usize])? }; self.f.write_fixed_size(" ")?; if let Some(prefix) = self.instr.segment_override_for_op(self.op_nr) { let name = prefix.name(); @@ -1220,7 +1220,7 @@ impl crate::long_mode::OperandVisitor for Coloriz write!(self.f, "[{}]", self.colors.address(u64_hex(imm))) } fn visit_disp(&mut self, reg: RegSpec, disp: i32) -> Result { - self.f.write_str(MEM_SIZE_STRINGS[self.instr.mem_size as usize])?; + unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS[self.instr.mem_size as usize])? }; self.f.write_fixed_size(" ")?; if let Some(prefix) = self.instr.segment_override_for_op(self.op_nr) { let name = prefix.name(); @@ -1263,7 +1263,7 @@ impl crate::long_mode::OperandVisitor for Coloriz self.f.write_fixed_size("]") } fn visit_deref(&mut self, reg: RegSpec) -> Result { - self.f.write_str(MEM_SIZE_STRINGS[self.instr.mem_size as usize])?; + unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS[self.instr.mem_size as usize])? }; self.f.write_fixed_size(" ")?; if let Some(prefix) = self.instr.segment_override_for_op(self.op_nr) { let name = prefix.name(); @@ -1276,7 +1276,7 @@ impl crate::long_mode::OperandVisitor for Coloriz self.f.write_fixed_size("]") } fn visit_reg_scale(&mut self, reg: RegSpec, scale: u8) -> Result { - self.f.write_str(MEM_SIZE_STRINGS[self.instr.mem_size as usize])?; + unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS[self.instr.mem_size as usize])? }; self.f.write_fixed_size(" ")?; if let Some(prefix) = self.instr.segment_override_for_op(self.op_nr) { let name = prefix.name(); @@ -1293,7 +1293,7 @@ impl crate::long_mode::OperandVisitor for Coloriz Ok(()) } fn visit_reg_scale_disp(&mut self, reg: RegSpec, scale: u8, disp: i32) -> Result { - self.f.write_str(MEM_SIZE_STRINGS[self.instr.mem_size as usize])?; + unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS[self.instr.mem_size as usize])? }; self.f.write_fixed_size(" ")?; if let Some(prefix) = self.instr.segment_override_for_op(self.op_nr) { let name = prefix.name(); @@ -1338,7 +1338,7 @@ impl crate::long_mode::OperandVisitor for Coloriz write!(self.f, "]") } fn visit_index_base_scale(&mut self, base: RegSpec, index: RegSpec, scale: u8) -> Result { - self.f.write_str(MEM_SIZE_STRINGS[self.instr.mem_size as usize])?; + unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS[self.instr.mem_size as usize])? }; self.f.write_fixed_size(" ")?; if let Some(prefix) = self.instr.segment_override_for_op(self.op_nr) { let name = prefix.name(); @@ -1355,7 +1355,7 @@ impl crate::long_mode::OperandVisitor for Coloriz self.f.write_fixed_size("]") } fn visit_index_base_scale_disp(&mut self, base: RegSpec, index: RegSpec, scale: u8, disp: i32) -> Result { - self.f.write_str(MEM_SIZE_STRINGS[self.instr.mem_size as usize])?; + unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS[self.instr.mem_size as usize])? }; self.f.write_fixed_size(" ")?; if let Some(prefix) = self.instr.segment_override_for_op(self.op_nr) { let name = prefix.name(); @@ -1402,7 +1402,7 @@ impl crate::long_mode::OperandVisitor for Coloriz self.f.write_fixed_size("]") } fn visit_reg_disp_masked(&mut self, spec: RegSpec, disp: i32, mask_reg: RegSpec) -> Result { - self.f.write_str(MEM_SIZE_STRINGS[self.instr.mem_size as usize])?; + unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS[self.instr.mem_size as usize])? }; self.f.write_fixed_size(" ")?; write!(self.f, "[{} ", regspec_label(&spec))?; format_number_i32(self.colors, self.f, disp, NumberStyleHint::HexSignedWithSignSplit)?; @@ -1410,7 +1410,7 @@ impl crate::long_mode::OperandVisitor for Coloriz write!(self.f, "{{{}}}", regspec_label(&mask_reg)) } fn visit_reg_deref_masked(&mut self, spec: RegSpec, mask_reg: RegSpec) -> Result { - self.f.write_str(MEM_SIZE_STRINGS[self.instr.mem_size as usize])?; + unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS[self.instr.mem_size as usize])? }; self.f.write_fixed_size(" ")?; self.f.write_fixed_size("[")?; self.f.write_str(regspec_label(&spec))?; @@ -1418,7 +1418,7 @@ impl crate::long_mode::OperandVisitor for Coloriz write!(self.f, "{{{}}}", regspec_label(&mask_reg)) } fn visit_reg_scale_masked(&mut self, spec: RegSpec, scale: u8, mask_reg: RegSpec) -> Result { - self.f.write_str(MEM_SIZE_STRINGS[self.instr.mem_size as usize])?; + unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS[self.instr.mem_size as usize])? }; self.f.write_fixed_size(" ")?; write!(self.f, "[{} * {}]", regspec_label(&spec), @@ -1427,7 +1427,7 @@ impl crate::long_mode::OperandVisitor for Coloriz write!(self.f, "{{{}}}", regspec_label(&mask_reg)) } fn visit_reg_scale_disp_masked(&mut self, spec: RegSpec, scale: u8, disp: i32, mask_reg: RegSpec) -> Result { - self.f.write_str(MEM_SIZE_STRINGS[self.instr.mem_size as usize])?; + unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS[self.instr.mem_size as usize])? }; self.f.write_fixed_size(" ")?; write!(self.f, "[{} * {} ", regspec_label(&spec), @@ -1438,7 +1438,7 @@ impl crate::long_mode::OperandVisitor for Coloriz write!(self.f, "{{{}}}", regspec_label(&mask_reg)) } fn visit_index_base_masked(&mut self, base: RegSpec, index: RegSpec, mask_reg: RegSpec) -> Result { - self.f.write_str(MEM_SIZE_STRINGS[self.instr.mem_size as usize])?; + unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS[self.instr.mem_size as usize])? }; self.f.write_fixed_size(" ")?; self.f.write_fixed_size("[")?; self.f.write_str(regspec_label(&base))?; @@ -1448,7 +1448,7 @@ impl crate::long_mode::OperandVisitor for Coloriz write!(self.f, "{{{}}}", regspec_label(&mask_reg)) } fn visit_index_base_disp_masked(&mut self, base: RegSpec, index: RegSpec, disp: i32, mask_reg: RegSpec) -> Result { - self.f.write_str(MEM_SIZE_STRINGS[self.instr.mem_size as usize])?; + unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS[self.instr.mem_size as usize])? }; self.f.write_fixed_size(" ")?; write!(self.f, "[{} + {} ", regspec_label(&base), @@ -1459,7 +1459,7 @@ impl crate::long_mode::OperandVisitor for Coloriz write!(self.f, "{{{}}}", regspec_label(&mask_reg)) } fn visit_index_base_scale_masked(&mut self, base: RegSpec, index: RegSpec, scale: u8, mask_reg: RegSpec) -> Result { - self.f.write_str(MEM_SIZE_STRINGS[self.instr.mem_size as usize])?; + unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS[self.instr.mem_size as usize])? }; self.f.write_fixed_size(" ")?; write!(self.f, "[{} + {} * {}]", regspec_label(&base), @@ -1469,7 +1469,7 @@ impl crate::long_mode::OperandVisitor for Coloriz write!(self.f, "{{{}}}", regspec_label(&mask_reg)) } fn visit_index_base_scale_disp_masked(&mut self, base: RegSpec, index: RegSpec, scale: u8, disp: i32, mask_reg: RegSpec) -> Result { - self.f.write_str(MEM_SIZE_STRINGS[self.instr.mem_size as usize])?; + unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS[self.instr.mem_size as usize])? }; self.f.write_fixed_size(" ")?; write!(self.f, "[{} + {} * {} ", regspec_label(&base), From 166695d09ab3c30a0147ac74aa856bae1f5542a8 Mon Sep 17 00:00:00 2001 From: iximeow Date: Tue, 18 Jun 2024 14:58:05 -0700 Subject: [PATCH 23/95] write_fixed_size impls for string and BigEnoughString --- src/long_mode/display.rs | 69 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 63 insertions(+), 6 deletions(-) diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index d7a80cb..575765c 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -371,14 +371,10 @@ pub enum TokenType { } pub trait DisplaySink: fmt::Write { - // /// may be optimized for writing strings of variable length. - // fn write_str(&mut self, s: &str) -> Result<(), core::fmt::Error>; fn write_fixed_size(&mut self, s: &str) -> Result<(), core::fmt::Error> { - for c in s.as_bytes().iter() { - self.write_char(*c as char)?; - } - Ok(()) + self.write_str(s) } + /// write a string to this sink that is less than 32 bytes. this is provided for optimization /// opportunities when writing a variable-length string with known max size. /// @@ -472,6 +468,37 @@ impl fmt::Write for BigEnoughString { // TODO: delete this whole thing? maybe? impl DisplaySink for alloc::string::String { + fn write_fixed_size(&mut self, s: &str) -> Result<(), core::fmt::Error> { + self.reserve(s.len()); + let buf = unsafe { self.as_mut_vec() }; + let new_bytes = s.as_bytes(); + + if new_bytes.len() == 0 { + unsafe { core::hint::unreachable_unchecked() } + } + + if new_bytes.len() >= 16 { + unsafe { core::hint::unreachable_unchecked() } + } + + unsafe { + let dest = buf.as_mut_ptr().offset(buf.len() as isize); + let src = new_bytes.as_ptr(); + + let mut rem = new_bytes.len() as isize; + + dest.offset(0 as isize).write(new_bytes[0]); + for i in 1..new_bytes.len() { + unsafe { + dest.offset(i as isize).write(new_bytes[i]); + } + } + + buf.set_len(buf.len() + new_bytes.len()); + } + + Ok(()) + } unsafe fn write_lt_32(&mut self, s: &str) -> Result<(), fmt::Error> { self.reserve(s.len()); @@ -686,6 +713,36 @@ impl DisplaySink for alloc::string::String { } impl DisplaySink for BigEnoughString { + fn write_fixed_size(&mut self, s: &str) -> Result<(), core::fmt::Error> { + let buf = unsafe { self.content.as_mut_vec() }; + let new_bytes = s.as_bytes(); + + if new_bytes.len() == 0 { + unsafe { core::hint::unreachable_unchecked() } + } + + if new_bytes.len() >= 16 { + unsafe { core::hint::unreachable_unchecked() } + } + + unsafe { + let dest = buf.as_mut_ptr().offset(buf.len() as isize); + let src = new_bytes.as_ptr(); + + let mut rem = new_bytes.len() as isize; + + dest.offset(0 as isize).write(new_bytes[0]); + for i in 1..new_bytes.len() { + unsafe { + dest.offset(i as isize).write(new_bytes[i]); + } + } + + buf.set_len(buf.len() + new_bytes.len()); + } + + Ok(()) + } unsafe fn write_lt_32(&mut self, s: &str) -> Result<(), fmt::Error> { // SAFETY: todo let buf = unsafe { self.content.as_mut_vec() }; From 4fb654284c9dcd2409300b58b11f3a6906d7e4f8 Mon Sep 17 00:00:00 2001 From: iximeow Date: Tue, 18 Jun 2024 15:25:28 -0700 Subject: [PATCH 24/95] actually use small-string specializations when available --- src/long_mode/display.rs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index 575765c..cf2edae 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -5646,17 +5646,12 @@ impl Instruction { } pub fn write_to(&self, out: &mut T) -> fmt::Result { - self.display_with(DisplayStyle::Intel).contextualize(&NoColors, 0, Some(&NoContext), out) + contextualize_intel(self, &NoColors, 0, Some(&NoContext), out) +// self.display_with(DisplayStyle::Intel).contextualize(&NoColors, 0, Some(&NoContext), out) } } -fn contextualize_intel(instr: &Instruction, colors: &Y, _address: u64, _context: Option<&NoContext>, out: &mut T) -> fmt::Result { - let mut out = NoColorsSink { - out, - }; - let mut out = &mut out; - use core::fmt::Write; - +fn contextualize_intel(instr: &Instruction, colors: &Y, _address: u64, _context: Option<&NoContext>, out: &mut T) -> fmt::Result { if instr.xacquire() { out.write_fixed_size("xacquire ")?; } @@ -6107,6 +6102,11 @@ impl <'instr, T: fmt::Write, Y: YaxColors> ShowContextual match style { DisplayStyle::Intel => { + let mut out = NoColorsSink { + out, + }; + let mut out = &mut out; + contextualize_intel(instr, colors, address, context, out) } DisplayStyle::C => { From 754e0da83bbb58df324c8c6dfa87df4c1b8216b4 Mon Sep 17 00:00:00 2001 From: iximeow Date: Tue, 18 Jun 2024 15:47:22 -0700 Subject: [PATCH 25/95] looks like that becomes memcpy, not ideal --- src/long_mode/display.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index cf2edae..903809c 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -1315,7 +1315,7 @@ impl crate::long_mode::OperandVisitor for Coloriz }; // not actually fixed size, but this should optimize right i hope.. - self.f.write_fixed_size(s)?; + unsafe { self.f.write_lt_16(s)?; } } self.f.write_fixed_size("]") } @@ -1390,7 +1390,7 @@ impl crate::long_mode::OperandVisitor for Coloriz }; // not actually fixed size, but this should optimize right i hope.. - self.f.write_fixed_size(s)?; + unsafe { self.f.write_lt_16(s)?; } } write!(self.f, "]") } @@ -1454,7 +1454,7 @@ impl crate::long_mode::OperandVisitor for Coloriz }; // not actually fixed size, but this should optimize right i hope.. - self.f.write_fixed_size(s)?; + unsafe { self.f.write_lt_16(s)?; } } self.f.write_fixed_size("]") } From 93145713adafffff8f4058bef7b4b37fa55406f4 Mon Sep 17 00:00:00 2001 From: iximeow Date: Tue, 18 Jun 2024 16:42:15 -0700 Subject: [PATCH 26/95] avoid intermediate buffer and copy of hex-formatted ints --- src/long_mode/display.rs | 142 +++++++++++++++++++++++++++++++++------ 1 file changed, 123 insertions(+), 19 deletions(-) diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index 903809c..c7f0f4d 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -403,6 +403,31 @@ pub trait DisplaySink: fmt::Write { unsafe fn write_lt_8(&mut self, s: &str) -> Result<(), core::fmt::Error> { self.write_str(s) } + + /// write a u8 to the output as a base-16 integer. + /// + /// this is provided for optimization opportunities when the formatted integer can be written + /// directly to the sink (rather than formatted to an intermediate buffer and output as a + /// followup step) + fn write_u8(&mut self, v: u8) -> Result<(), core::fmt::Error> { + write!(self, "{:x}", v) + } + /// write a u16 to the output as a base-16 integer. + /// + /// this is provided for optimization opportunities when the formatted integer can be written + /// directly to the sink (rather than formatted to an intermediate buffer and output as a + /// followup step) + fn write_u16(&mut self, v: u16) -> Result<(), core::fmt::Error> { + write!(self, "{:x}", v) + } + /// write a u32 to the output as a base-16 integer. + /// + /// this is provided for optimization opportunities when the formatted integer can be written + /// directly to the sink (rather than formatted to an intermediate buffer and output as a + /// followup step) + fn write_u32(&mut self, v: u32) -> Result<(), core::fmt::Error> { + write!(self, "{:x}", v) + } // fn write_char(&mut self, c: char) -> Result<(), core::fmt::Error>; fn span_enter(&mut self, ty: TokenType); fn span_end(&mut self, ty: TokenType); @@ -708,6 +733,55 @@ impl DisplaySink for alloc::string::String { Ok(()) } + /// write a u8 to the output as a base-16 integer. + /// + /// this is provided for optimization opportunities when the formatted integer can be written + /// directly to the sink (rather than formatted to an intermediate buffer and output as a + /// followup step) + fn write_u8(&mut self, v: u8) -> Result<(), core::fmt::Error> { + use core::fmt::Write; + write!(self, "{:x}", v) + } + /// write a u16 to the output as a base-16 integer. + /// + /// this is provided for optimization opportunities when the formatted integer can be written + /// directly to the sink (rather than formatted to an intermediate buffer and output as a + /// followup step) + fn write_u16(&mut self, v: u16) -> Result<(), core::fmt::Error> { + use core::fmt::Write; + write!(self, "{:x}", v) + } + /// write a u32 to the output as a base-16 integer. + /// + /// this is provided for optimization opportunities when the formatted integer can be written + /// directly to the sink (rather than formatted to an intermediate buffer and output as a + /// followup step) + fn write_u32(&mut self, mut v: u32) -> Result<(), core::fmt::Error> { + // we can fairly easily predict the size of a formatted string here with lzcnt, which also + // means we can write directly into the correct offsets of the output string. + let printed_size = (((32 - v.leading_zeros()) >> 2) + 1) as usize; + self.reserve(printed_size); + + unsafe { + let buf = unsafe { self.as_mut_vec() }; + let p = buf.as_mut_ptr(); + let mut curr = printed_size; + loop { + let digit = v % 16; + let c = c_to_hex(digit as u8); + curr -= 1; + p.offset(curr as isize).write(c); + v = v / 16; + if v == 0 { + break; + } + } + + buf.set_len(buf.len() + printed_size); + } + + Ok(()) + } fn span_enter(&mut self, ty: TokenType) {} fn span_end(&mut self, ty: TokenType) {} } @@ -946,6 +1020,54 @@ impl DisplaySink for BigEnoughString { Ok(()) } + /// write a u8 to the output as a base-16 integer. + /// + /// this is provided for optimization opportunities when the formatted integer can be written + /// directly to the sink (rather than formatted to an intermediate buffer and output as a + /// followup step) + fn write_u8(&mut self, v: u8) -> Result<(), core::fmt::Error> { + use core::fmt::Write; + write!(self, "{:x}", v) + } + /// write a u16 to the output as a base-16 integer. + /// + /// this is provided for optimization opportunities when the formatted integer can be written + /// directly to the sink (rather than formatted to an intermediate buffer and output as a + /// followup step) + fn write_u16(&mut self, v: u16) -> Result<(), core::fmt::Error> { + use core::fmt::Write; + write!(self, "{:x}", v) + } + /// write a u32 to the output as a base-16 integer. + /// + /// this is provided for optimization opportunities when the formatted integer can be written + /// directly to the sink (rather than formatted to an intermediate buffer and output as a + /// followup step) + fn write_u32(&mut self, mut v: u32) -> Result<(), core::fmt::Error> { + // we can fairly easily predict the size of a formatted string here with lzcnt, which also + // means we can write directly into the correct offsets of the output string. + let printed_size = (((32 - v.leading_zeros()) >> 2) + 1) as usize; + + unsafe { + let buf = unsafe { self.content.as_mut_vec() }; + let p = buf.as_mut_ptr(); + let mut curr = printed_size; + loop { + let digit = v % 16; + let c = c_to_hex(digit as u8); + curr -= 1; + p.offset(curr as isize).write(c); + v = v / 16; + if v == 0 { + break; + } + } + + buf.set_len(buf.len() + printed_size); + } + + Ok(()) + } fn span_enter(&mut self, ty: TokenType) {} fn span_end(&mut self, ty: TokenType) {} } @@ -1297,25 +1419,7 @@ impl crate::long_mode::OperandVisitor for Coloriz } else { self.f.write_fixed_size("+ 0x")?; } - let mut buf = [core::mem::MaybeUninit::::uninit(); 8]; - let mut curr = buf.len(); - loop { - let digit = v % 16; - let c = c_to_hex(digit as u8); - curr -= 1; - buf[curr].write(c); - v = v / 16; - if v == 0 { - break; - } - } - let buf = &buf[curr..]; - let s = unsafe { - core::mem::transmute::<&[core::mem::MaybeUninit], &str>(buf) - }; - - // not actually fixed size, but this should optimize right i hope.. - unsafe { self.f.write_lt_16(s)?; } + self.f.write_u32(v)?; } self.f.write_fixed_size("]") } From bebba5add1ea460db29cd0268f3365fae3cabbdd Mon Sep 17 00:00:00 2001 From: iximeow Date: Tue, 18 Jun 2024 17:01:58 -0700 Subject: [PATCH 27/95] slightly more centralized hex formatting --- src/long_mode/display.rs | 48 +++++++--------------------------------- 1 file changed, 8 insertions(+), 40 deletions(-) diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index c7f0f4d..6d3be36 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -1385,7 +1385,11 @@ impl crate::long_mode::OperandVisitor for Coloriz self.f.write_char(name[1] as char)?; self.f.write_char(':')?; } - write!(self.f, "[{}]", self.colors.address(u32_hex(imm))) + self.f.write_fixed_size("[")?; + self.f.write_fixed_size("0x")?; + self.f.write_u32(imm)?; + self.f.write_fixed_size("]")?; + Ok(()) } fn visit_abs_u64(&mut self, imm: u64) -> Result { unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS[self.instr.mem_size as usize])? }; @@ -1476,27 +1480,9 @@ impl crate::long_mode::OperandVisitor for Coloriz } else { self.f.write_fixed_size("+ 0x")?; } - let mut buf = [core::mem::MaybeUninit::::uninit(); 8]; - let mut curr = buf.len(); - loop { - let digit = v % 16; - let c = c_to_hex(digit as u8); - curr -= 1; - buf[curr].write(c); - v = v / 16; - if v == 0 { - break; - } - } - let buf = &buf[curr..]; - let s = unsafe { - core::mem::transmute::<&[core::mem::MaybeUninit], &str>(buf) - }; - - // not actually fixed size, but this should optimize right i hope.. - unsafe { self.f.write_lt_16(s)?; } + self.f.write_u32(v); } - write!(self.f, "]") + self.f.write_char(']') } fn visit_index_base_scale(&mut self, base: RegSpec, index: RegSpec, scale: u8) -> Result { unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS[self.instr.mem_size as usize])? }; @@ -1540,25 +1526,7 @@ impl crate::long_mode::OperandVisitor for Coloriz } else { self.f.write_fixed_size("+ 0x")?; } - let mut buf = [core::mem::MaybeUninit::::uninit(); 8]; - let mut curr = buf.len(); - loop { - let digit = v % 16; - let c = c_to_hex(digit as u8); - curr -= 1; - buf[curr].write(c); - v = v / 16; - if v == 0 { - break; - } - } - let buf = &buf[curr..]; - let s = unsafe { - core::mem::transmute::<&[core::mem::MaybeUninit], &str>(buf) - }; - - // not actually fixed size, but this should optimize right i hope.. - unsafe { self.f.write_lt_16(s)?; } + self.f.write_u32(v)?; } self.f.write_fixed_size("]") } From 514586f65bf493b5b48aaf7208f9b381b9293eb7 Mon Sep 17 00:00:00 2001 From: iximeow Date: Tue, 18 Jun 2024 17:09:26 -0700 Subject: [PATCH 28/95] write_fixed_size really should always be inlined... --- src/long_mode/display.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index 6d3be36..99d4f91 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -371,6 +371,7 @@ pub enum TokenType { } pub trait DisplaySink: fmt::Write { + #[inline(always)] fn write_fixed_size(&mut self, s: &str) -> Result<(), core::fmt::Error> { self.write_str(s) } @@ -493,6 +494,7 @@ impl fmt::Write for BigEnoughString { // TODO: delete this whole thing? maybe? impl DisplaySink for alloc::string::String { + #[inline(always)] fn write_fixed_size(&mut self, s: &str) -> Result<(), core::fmt::Error> { self.reserve(s.len()); let buf = unsafe { self.as_mut_vec() }; @@ -787,6 +789,7 @@ impl DisplaySink for alloc::string::String { } impl DisplaySink for BigEnoughString { + #[inline(always)] fn write_fixed_size(&mut self, s: &str) -> Result<(), core::fmt::Error> { let buf = unsafe { self.content.as_mut_vec() }; let new_bytes = s.as_bytes(); From 0717863be4d7af2dff0d680dc61ecdcd02626101 Mon Sep 17 00:00:00 2001 From: iximeow Date: Tue, 18 Jun 2024 17:19:36 -0700 Subject: [PATCH 29/95] visit_disp is called in only two places, is tiny.. From afc361c6a9d797a4ca9ffc913079082779984a83 Mon Sep 17 00:00:00 2001 From: iximeow Date: Tue, 18 Jun 2024 17:25:07 -0700 Subject: [PATCH 30/95] use get_kinda_unchecked for mem size strings if mem_size is ever out of bounds thats a severe bug on its own --- src/long_mode/display.rs | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index 99d4f91..fce1855 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -1380,7 +1380,7 @@ impl crate::long_mode::OperandVisitor for Coloriz Ok(()) } fn visit_abs_u32(&mut self, imm: u32) -> Result { - unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS[self.instr.mem_size as usize])? }; + unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS.get_kinda_unchecked(self.instr.mem_size as usize))? }; self.f.write_fixed_size(" ")?; if let Some(prefix) = self.instr.segment_override_for_op(self.op_nr) { let name = prefix.name(); @@ -1395,7 +1395,7 @@ impl crate::long_mode::OperandVisitor for Coloriz Ok(()) } fn visit_abs_u64(&mut self, imm: u64) -> Result { - unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS[self.instr.mem_size as usize])? }; + unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS.get_kinda_unchecked(self.instr.mem_size as usize))? }; self.f.write_fixed_size(" ")?; if let Some(prefix) = self.instr.segment_override_for_op(self.op_nr) { let name = prefix.name(); @@ -1406,7 +1406,7 @@ impl crate::long_mode::OperandVisitor for Coloriz write!(self.f, "[{}]", self.colors.address(u64_hex(imm))) } fn visit_disp(&mut self, reg: RegSpec, disp: i32) -> Result { - unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS[self.instr.mem_size as usize])? }; + unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS.get_kinda_unchecked(self.instr.mem_size as usize))? }; self.f.write_fixed_size(" ")?; if let Some(prefix) = self.instr.segment_override_for_op(self.op_nr) { let name = prefix.name(); @@ -1431,7 +1431,7 @@ impl crate::long_mode::OperandVisitor for Coloriz self.f.write_fixed_size("]") } fn visit_deref(&mut self, reg: RegSpec) -> Result { - unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS[self.instr.mem_size as usize])? }; + unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS.get_kinda_unchecked(self.instr.mem_size as usize))? }; self.f.write_fixed_size(" ")?; if let Some(prefix) = self.instr.segment_override_for_op(self.op_nr) { let name = prefix.name(); @@ -1444,7 +1444,7 @@ impl crate::long_mode::OperandVisitor for Coloriz self.f.write_fixed_size("]") } fn visit_reg_scale(&mut self, reg: RegSpec, scale: u8) -> Result { - unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS[self.instr.mem_size as usize])? }; + unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS.get_kinda_unchecked(self.instr.mem_size as usize))? }; self.f.write_fixed_size(" ")?; if let Some(prefix) = self.instr.segment_override_for_op(self.op_nr) { let name = prefix.name(); @@ -1461,7 +1461,7 @@ impl crate::long_mode::OperandVisitor for Coloriz Ok(()) } fn visit_reg_scale_disp(&mut self, reg: RegSpec, scale: u8, disp: i32) -> Result { - unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS[self.instr.mem_size as usize])? }; + unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS.get_kinda_unchecked(self.instr.mem_size as usize))? }; self.f.write_fixed_size(" ")?; if let Some(prefix) = self.instr.segment_override_for_op(self.op_nr) { let name = prefix.name(); @@ -1488,7 +1488,7 @@ impl crate::long_mode::OperandVisitor for Coloriz self.f.write_char(']') } fn visit_index_base_scale(&mut self, base: RegSpec, index: RegSpec, scale: u8) -> Result { - unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS[self.instr.mem_size as usize])? }; + unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS.get_kinda_unchecked(self.instr.mem_size as usize))? }; self.f.write_fixed_size(" ")?; if let Some(prefix) = self.instr.segment_override_for_op(self.op_nr) { let name = prefix.name(); @@ -1505,7 +1505,7 @@ impl crate::long_mode::OperandVisitor for Coloriz self.f.write_fixed_size("]") } fn visit_index_base_scale_disp(&mut self, base: RegSpec, index: RegSpec, scale: u8, disp: i32) -> Result { - unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS[self.instr.mem_size as usize])? }; + unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS.get_kinda_unchecked(self.instr.mem_size as usize))? }; self.f.write_fixed_size(" ")?; if let Some(prefix) = self.instr.segment_override_for_op(self.op_nr) { let name = prefix.name(); @@ -1534,7 +1534,7 @@ impl crate::long_mode::OperandVisitor for Coloriz self.f.write_fixed_size("]") } fn visit_reg_disp_masked(&mut self, spec: RegSpec, disp: i32, mask_reg: RegSpec) -> Result { - unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS[self.instr.mem_size as usize])? }; + unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS.get_kinda_unchecked(self.instr.mem_size as usize))? }; self.f.write_fixed_size(" ")?; write!(self.f, "[{} ", regspec_label(&spec))?; format_number_i32(self.colors, self.f, disp, NumberStyleHint::HexSignedWithSignSplit)?; @@ -1542,7 +1542,7 @@ impl crate::long_mode::OperandVisitor for Coloriz write!(self.f, "{{{}}}", regspec_label(&mask_reg)) } fn visit_reg_deref_masked(&mut self, spec: RegSpec, mask_reg: RegSpec) -> Result { - unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS[self.instr.mem_size as usize])? }; + unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS.get_kinda_unchecked(self.instr.mem_size as usize))? }; self.f.write_fixed_size(" ")?; self.f.write_fixed_size("[")?; self.f.write_str(regspec_label(&spec))?; @@ -1550,7 +1550,7 @@ impl crate::long_mode::OperandVisitor for Coloriz write!(self.f, "{{{}}}", regspec_label(&mask_reg)) } fn visit_reg_scale_masked(&mut self, spec: RegSpec, scale: u8, mask_reg: RegSpec) -> Result { - unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS[self.instr.mem_size as usize])? }; + unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS.get_kinda_unchecked(self.instr.mem_size as usize))? }; self.f.write_fixed_size(" ")?; write!(self.f, "[{} * {}]", regspec_label(&spec), @@ -1559,7 +1559,7 @@ impl crate::long_mode::OperandVisitor for Coloriz write!(self.f, "{{{}}}", regspec_label(&mask_reg)) } fn visit_reg_scale_disp_masked(&mut self, spec: RegSpec, scale: u8, disp: i32, mask_reg: RegSpec) -> Result { - unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS[self.instr.mem_size as usize])? }; + unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS.get_kinda_unchecked(self.instr.mem_size as usize))? }; self.f.write_fixed_size(" ")?; write!(self.f, "[{} * {} ", regspec_label(&spec), @@ -1570,7 +1570,7 @@ impl crate::long_mode::OperandVisitor for Coloriz write!(self.f, "{{{}}}", regspec_label(&mask_reg)) } fn visit_index_base_masked(&mut self, base: RegSpec, index: RegSpec, mask_reg: RegSpec) -> Result { - unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS[self.instr.mem_size as usize])? }; + unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS.get_kinda_unchecked(self.instr.mem_size as usize))? }; self.f.write_fixed_size(" ")?; self.f.write_fixed_size("[")?; self.f.write_str(regspec_label(&base))?; @@ -1580,7 +1580,7 @@ impl crate::long_mode::OperandVisitor for Coloriz write!(self.f, "{{{}}}", regspec_label(&mask_reg)) } fn visit_index_base_disp_masked(&mut self, base: RegSpec, index: RegSpec, disp: i32, mask_reg: RegSpec) -> Result { - unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS[self.instr.mem_size as usize])? }; + unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS.get_kinda_unchecked(self.instr.mem_size as usize))? }; self.f.write_fixed_size(" ")?; write!(self.f, "[{} + {} ", regspec_label(&base), @@ -1591,7 +1591,7 @@ impl crate::long_mode::OperandVisitor for Coloriz write!(self.f, "{{{}}}", regspec_label(&mask_reg)) } fn visit_index_base_scale_masked(&mut self, base: RegSpec, index: RegSpec, scale: u8, mask_reg: RegSpec) -> Result { - unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS[self.instr.mem_size as usize])? }; + unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS.get_kinda_unchecked(self.instr.mem_size as usize))? }; self.f.write_fixed_size(" ")?; write!(self.f, "[{} + {} * {}]", regspec_label(&base), @@ -1601,7 +1601,7 @@ impl crate::long_mode::OperandVisitor for Coloriz write!(self.f, "{{{}}}", regspec_label(&mask_reg)) } fn visit_index_base_scale_disp_masked(&mut self, base: RegSpec, index: RegSpec, scale: u8, disp: i32, mask_reg: RegSpec) -> Result { - unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS[self.instr.mem_size as usize])? }; + unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS.get_kinda_unchecked(self.instr.mem_size as usize))? }; self.f.write_fixed_size(" ")?; write!(self.f, "[{} + {} * {} ", regspec_label(&base), From 95c3f4831af654b932e55290540e1435a7e46341 Mon Sep 17 00:00:00 2001 From: iximeow Date: Tue, 18 Jun 2024 17:41:04 -0700 Subject: [PATCH 31/95] use specialized write helpers for register labels --- src/long_mode/display.rs | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index fce1855..ddffae2 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -1354,7 +1354,7 @@ impl crate::long_mode::OperandVisitor for Coloriz Ok(()) } fn visit_reg_mask_merge_sae(&mut self, spec: RegSpec, mask: RegSpec, merge_mode: MergeMode, sae_mode: crate::long_mode::SaeMode) -> Result { - self.f.write_str(regspec_label(&spec))?; + unsafe { self.f.write_lt_8(regspec_label(&spec))?; } if mask.num != 0 { self.f.write_fixed_size("{")?; unsafe { self.f.write_lt_8(regspec_label(&mask))?; } @@ -1363,11 +1363,11 @@ impl crate::long_mode::OperandVisitor for Coloriz if let MergeMode::Zero = merge_mode { self.f.write_fixed_size("{z}")?; } - self.f.write_str(sae_mode.label())?; + unsafe { self.f.write_lt_16(sae_mode.label())?; } Ok(()) } fn visit_reg_mask_merge_sae_noround(&mut self, spec: RegSpec, mask: RegSpec, merge_mode: MergeMode) -> Result { - self.f.write_str(regspec_label(&spec))?; + unsafe { self.f.write_lt_8(regspec_label(&spec))?; } if mask.num != 0 { self.f.write_fixed_size("{")?; self.f.write_str(regspec_label(&mask))?; @@ -1536,7 +1536,9 @@ impl crate::long_mode::OperandVisitor for Coloriz fn visit_reg_disp_masked(&mut self, spec: RegSpec, disp: i32, mask_reg: RegSpec) -> Result { unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS.get_kinda_unchecked(self.instr.mem_size as usize))? }; self.f.write_fixed_size(" ")?; - write!(self.f, "[{} ", regspec_label(&spec))?; + self.f.write_char('[')?; + unsafe { self.f.write_lt_8(regspec_label(&spec))?; } + self.f.write_char(' ')?; format_number_i32(self.colors, self.f, disp, NumberStyleHint::HexSignedWithSignSplit)?; write!(self.f, "]")?; write!(self.f, "{{{}}}", regspec_label(&mask_reg)) @@ -1545,7 +1547,7 @@ impl crate::long_mode::OperandVisitor for Coloriz unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS.get_kinda_unchecked(self.instr.mem_size as usize))? }; self.f.write_fixed_size(" ")?; self.f.write_fixed_size("[")?; - self.f.write_str(regspec_label(&spec))?; + unsafe { self.f.write_lt_8(regspec_label(&spec))?; } self.f.write_fixed_size("]")?; write!(self.f, "{{{}}}", regspec_label(&mask_reg)) } @@ -1573,9 +1575,9 @@ impl crate::long_mode::OperandVisitor for Coloriz unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS.get_kinda_unchecked(self.instr.mem_size as usize))? }; self.f.write_fixed_size(" ")?; self.f.write_fixed_size("[")?; - self.f.write_str(regspec_label(&base))?; + unsafe { self.f.write_lt_8(regspec_label(&base))?; } self.f.write_fixed_size(" + ")?; - self.f.write_str(regspec_label(&index))?; + unsafe { self.f.write_lt_8(regspec_label(&index))?; } self.f.write_fixed_size("]")?; write!(self.f, "{{{}}}", regspec_label(&mask_reg)) } From 8de036c2ac6777220a2d1d755036896ab295f7bb Mon Sep 17 00:00:00 2001 From: iximeow Date: Tue, 18 Jun 2024 17:49:54 -0700 Subject: [PATCH 32/95] use specialized printers for immediate operands --- src/long_mode/display.rs | 189 +++++++++++++++++++++------------------ 1 file changed, 103 insertions(+), 86 deletions(-) diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index ddffae2..95511f9 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -740,18 +740,63 @@ impl DisplaySink for alloc::string::String { /// this is provided for optimization opportunities when the formatted integer can be written /// directly to the sink (rather than formatted to an intermediate buffer and output as a /// followup step) - fn write_u8(&mut self, v: u8) -> Result<(), core::fmt::Error> { - use core::fmt::Write; - write!(self, "{:x}", v) + fn write_u8(&mut self, mut v: u8) -> Result<(), core::fmt::Error> { + // we can fairly easily predict the size of a formatted string here with lzcnt, which also + // means we can write directly into the correct offsets of the output string. + let printed_size = (((8 - v.leading_zeros()) >> 2) + 1) as usize; + self.reserve(printed_size); + + unsafe { + let buf = unsafe { self.as_mut_vec() }; + let p = buf.as_mut_ptr(); + let mut curr = printed_size; + loop { + let digit = v % 16; + let c = c_to_hex(digit as u8); + curr -= 1; + p.offset(curr as isize).write(c); + v = v / 16; + if v == 0 { + break; + } + } + + buf.set_len(buf.len() + printed_size); + } + + Ok(()) } /// write a u16 to the output as a base-16 integer. /// /// this is provided for optimization opportunities when the formatted integer can be written /// directly to the sink (rather than formatted to an intermediate buffer and output as a /// followup step) - fn write_u16(&mut self, v: u16) -> Result<(), core::fmt::Error> { - use core::fmt::Write; - write!(self, "{:x}", v) + fn write_u16(&mut self, mut v: u16) -> Result<(), core::fmt::Error> { + // we can fairly easily predict the size of a formatted string here with lzcnt, which also + // means we can write directly into the correct offsets of the output string. + let printed_size = (((16 - v.leading_zeros()) >> 2) + 1) as usize; + self.reserve(printed_size); + + unsafe { + let buf = unsafe { self.as_mut_vec() }; + let p = buf.as_mut_ptr(); + let mut curr = printed_size; + loop { + let digit = v % 16; + let c = c_to_hex(digit as u8); + curr -= 1; + p.offset(curr as isize).write(c); + v = v / 16; + if v == 0 { + break; + } + } + + buf.set_len(buf.len() + printed_size); + } + + Ok(()) + } /// write a u32 to the output as a base-16 integer. /// @@ -1028,18 +1073,60 @@ impl DisplaySink for BigEnoughString { /// this is provided for optimization opportunities when the formatted integer can be written /// directly to the sink (rather than formatted to an intermediate buffer and output as a /// followup step) - fn write_u8(&mut self, v: u8) -> Result<(), core::fmt::Error> { - use core::fmt::Write; - write!(self, "{:x}", v) + fn write_u8(&mut self, mut v: u8) -> Result<(), core::fmt::Error> { + // we can fairly easily predict the size of a formatted string here with lzcnt, which also + // means we can write directly into the correct offsets of the output string. + let printed_size = (((8 - v.leading_zeros()) >> 2) + 1) as usize; + + unsafe { + let buf = unsafe { self.content.as_mut_vec() }; + let p = buf.as_mut_ptr(); + let mut curr = printed_size; + loop { + let digit = v % 16; + let c = c_to_hex(digit as u8); + curr -= 1; + p.offset(curr as isize).write(c); + v = v / 16; + if v == 0 { + break; + } + } + + buf.set_len(buf.len() + printed_size); + } + + Ok(()) } /// write a u16 to the output as a base-16 integer. /// /// this is provided for optimization opportunities when the formatted integer can be written /// directly to the sink (rather than formatted to an intermediate buffer and output as a /// followup step) - fn write_u16(&mut self, v: u16) -> Result<(), core::fmt::Error> { - use core::fmt::Write; - write!(self, "{:x}", v) + fn write_u16(&mut self, mut v: u16) -> Result<(), core::fmt::Error> { + // we can fairly easily predict the size of a formatted string here with lzcnt, which also + // means we can write directly into the correct offsets of the output string. + let printed_size = (((16 - v.leading_zeros()) >> 2) + 1) as usize; + + unsafe { + let buf = unsafe { self.content.as_mut_vec() }; + let p = buf.as_mut_ptr(); + let mut curr = printed_size; + loop { + let digit = v % 16; + let c = c_to_hex(digit as u8); + curr -= 1; + p.offset(curr as isize).write(c); + v = v / 16; + if v == 0 { + break; + } + } + + buf.set_len(buf.len() + printed_size); + } + + Ok(()) } /// write a u32 to the output as a base-16 integer. /// @@ -1172,24 +1259,7 @@ impl crate::long_mode::OperandVisitor for Coloriz fn visit_u16(&mut self, imm: u16) -> Result { self.f.span_enter(TokenType::Immediate); self.f.write_fixed_size("0x")?; - let mut buf = [MaybeUninit::::uninit(); 4]; - let mut curr = buf.len(); - let mut v = imm; - loop { - let digit = v % 16; - let c = c_to_hex(digit as u8); - curr -= 1; - buf[curr].write(c); - v = v / 16; - if v == 0 { - break; - } - } - let buf = &buf[curr..]; - let s: &str = unsafe { - core::mem::transmute::<&[MaybeUninit], &str>(buf) - }; - self.f.write_fixed_size(s)?; + self.f.write_u16(imm)?; self.f.span_end(TokenType::Immediate); Ok(()) } @@ -1201,49 +1271,14 @@ impl crate::long_mode::OperandVisitor for Coloriz v = -imm as u16; } self.f.write_fixed_size("0x")?; - let mut buf = [core::mem::MaybeUninit::::uninit(); 4]; - let mut curr = buf.len(); - loop { - let digit = v % 16; - let c = c_to_hex(digit as u8); - curr -= 1; - buf[curr].write(c); - v = v / 16; - if v == 0 { - break; - } - } - let buf = &buf[curr..]; - let s = unsafe { - core::mem::transmute::<&[core::mem::MaybeUninit], &str>(buf) - }; - - // not actually fixed size, but this should optimize right i hope.. - self.f.write_fixed_size(s)?; + self.f.write_u16(v)?; self.f.span_end(TokenType::Immediate); Ok(()) } fn visit_u32(&mut self, imm: u32) -> Result { self.f.span_enter(TokenType::Immediate); self.f.write_fixed_size("0x")?; - let mut buf = [MaybeUninit::::uninit(); 8]; - let mut curr = buf.len(); - let mut v = imm; - loop { - let digit = v % 16; - let c = c_to_hex(digit as u8); - curr -= 1; - buf[curr].write(c); - v = v / 16; - if v == 0 { - break; - } - } - let buf = &buf[curr..]; - let s: &str = unsafe { - core::mem::transmute::<&[MaybeUninit], &str>(buf) - }; - self.f.write_fixed_size(s)?; + self.f.write_u32(imm)?; self.f.span_end(TokenType::Immediate); Ok(()) } @@ -1255,25 +1290,7 @@ impl crate::long_mode::OperandVisitor for Coloriz v = -imm as u32; } self.f.write_fixed_size("0x")?; - let mut buf = [core::mem::MaybeUninit::::uninit(); 8]; - let mut curr = buf.len(); - loop { - let digit = v % 16; - let c = c_to_hex(digit as u8); - curr -= 1; - buf[curr].write(c); - v = v / 16; - if v == 0 { - break; - } - } - let buf = &buf[curr..]; - let s = unsafe { - core::mem::transmute::<&[core::mem::MaybeUninit], &str>(buf) - }; - - // not actually fixed size, but this should optimize right i hope.. - self.f.write_fixed_size(s)?; + self.f.write_u32(v)?; self.f.span_end(TokenType::Immediate); Ok(()) } From 1506b8fc0f58a7b1eb91009302b79750ff7cbc8b Mon Sep 17 00:00:00 2001 From: iximeow Date: Tue, 18 Jun 2024 17:53:31 -0700 Subject: [PATCH 33/95] use hex printer helpers for relative offsets too --- src/long_mode/display.rs | 46 ++-------------------------------------- 1 file changed, 2 insertions(+), 44 deletions(-) diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index 95511f9..10f1db8 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -6336,27 +6336,7 @@ impl<'a, Y: YaxColors, F: DisplaySink> crate::long_mode::OperandVisitor for Rela // danger_anguished_string_write(&mut self.out, "+"); } self.out.write_fixed_size("0x")?; - // danger_anguished_string_write(self.out, "0x"); - let mut buf = [core::mem::MaybeUninit::::uninit(); 2]; - let mut curr = buf.len(); - loop { - let digit = v % 16; - let c = c_to_hex(digit as u8); - curr -= 1; - buf[curr].write(c); - v = v / 16; - if v == 0 { - break; - } - } - let buf = &buf[curr..]; - let s = unsafe { - core::mem::transmute::<&[core::mem::MaybeUninit], &str>(buf) - }; - - // not actually fixed size, but this should optimize right i hope.. - self.out.write_fixed_size(s)?; -// anguished_string_write(&mut self.out, s); + self.out.write_u8(v)?; Ok(true) } else { Ok(false) @@ -6377,29 +6357,7 @@ impl<'a, Y: YaxColors, F: DisplaySink> crate::long_mode::OperandVisitor for Rela // danger_anguished_string_write(&mut self.out, "+"); } self.out.write_fixed_size("0x")?; - // danger_anguished_string_write(self.out, "0x"); - let mut buf = [core::mem::MaybeUninit::::uninit(); 8]; - let mut curr = buf.len(); - loop { - let digit = v % 16; - let c = c_to_hex(digit as u8); - curr -= 1; - buf[curr].write(c); - v = v / 16; - if v == 0 { - break; - } - } - let buf = &buf[curr..]; - let s = unsafe { - core::mem::transmute::<&[core::mem::MaybeUninit], &str>(buf) - }; - -// danger_anguished_string_write(&mut self.out, s); - - // danger_anguished_variable_length_bstring_write(unsafe { self.out.as_mut_vec() }, s.as_bytes()); - // not actually fixed size, but this should optimize right i hope.. - self.out.write_fixed_size(s)?; + self.out.write_u32(v)?; Ok(true) } else { Ok(false) From eff863b487198106f55d459024d91f9b44ea761f Mon Sep 17 00:00:00 2001 From: iximeow Date: Tue, 18 Jun 2024 21:43:49 -0700 Subject: [PATCH 34/95] whats it do without the unused colors parameter --- src/long_mode/display.rs | 123 +++++++++++++++++++++++---------------- 1 file changed, 72 insertions(+), 51 deletions(-) diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index 10f1db8..a303872 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -1189,16 +1189,15 @@ impl BigEnoughString { } } -struct ColorizingOperandVisitor<'a, T, Y> { +struct ColorizingOperandVisitor<'a, T> { instr: &'a Instruction, op_nr: u8, - colors: &'a Y, f: &'a mut T, } use core::mem::MaybeUninit; -impl crate::long_mode::OperandVisitor for ColorizingOperandVisitor<'_, T, Y> { +impl crate::long_mode::OperandVisitor for ColorizingOperandVisitor<'_, T> { type Ok = (); type Error = core::fmt::Error; @@ -1420,7 +1419,7 @@ impl crate::long_mode::OperandVisitor for Coloriz self.f.write_char(name[1] as char)?; self.f.write_char(':')?; } - write!(self.f, "[{}]", self.colors.address(u64_hex(imm))) + write!(self.f, "[{}]", u64_hex(imm)) } fn visit_disp(&mut self, reg: RegSpec, disp: i32) -> Result { unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS.get_kinda_unchecked(self.instr.mem_size as usize))? }; @@ -1556,7 +1555,14 @@ impl crate::long_mode::OperandVisitor for Coloriz self.f.write_char('[')?; unsafe { self.f.write_lt_8(regspec_label(&spec))?; } self.f.write_char(' ')?; - format_number_i32(self.colors, self.f, disp, NumberStyleHint::HexSignedWithSignSplit)?; + let mut v = disp as u32; + if disp < 0 { + self.f.write_fixed_size("- 0x")?; + v = -disp as u32; + } else { + self.f.write_fixed_size("+ 0x")?; + } + self.f.write_u32(v)?; write!(self.f, "]")?; write!(self.f, "{{{}}}", regspec_label(&mask_reg)) } @@ -1573,7 +1579,7 @@ impl crate::long_mode::OperandVisitor for Coloriz self.f.write_fixed_size(" ")?; write!(self.f, "[{} * {}]", regspec_label(&spec), - self.colors.number(scale) + scale )?; write!(self.f, "{{{}}}", regspec_label(&mask_reg)) } @@ -1582,9 +1588,16 @@ impl crate::long_mode::OperandVisitor for Coloriz self.f.write_fixed_size(" ")?; write!(self.f, "[{} * {} ", regspec_label(&spec), - self.colors.number(scale), + scale, )?; - format_number_i32(self.colors, self.f, disp, NumberStyleHint::HexSignedWithSignSplit)?; + let mut v = disp as u32; + if disp < 0 { + self.f.write_fixed_size("- 0x")?; + v = -disp as u32; + } else { + self.f.write_fixed_size("+ 0x")?; + } + self.f.write_u32(v)?; write!(self.f, "]")?; write!(self.f, "{{{}}}", regspec_label(&mask_reg)) } @@ -1605,7 +1618,14 @@ impl crate::long_mode::OperandVisitor for Coloriz regspec_label(&base), regspec_label(&index), )?; - format_number_i32(self.colors, self.f, disp, NumberStyleHint::HexSignedWithSignSplit)?; + let mut v = disp as u32; + if disp < 0 { + self.f.write_fixed_size("- 0x")?; + v = -disp as u32; + } else { + self.f.write_fixed_size("+ 0x")?; + } + self.f.write_u32(v)?; write!(self.f, "]")?; write!(self.f, "{{{}}}", regspec_label(&mask_reg)) } @@ -1615,7 +1635,7 @@ impl crate::long_mode::OperandVisitor for Coloriz write!(self.f, "[{} + {} * {}]", regspec_label(&base), regspec_label(&index), - self.colors.number(scale) + scale )?; write!(self.f, "{{{}}}", regspec_label(&mask_reg)) } @@ -1625,9 +1645,16 @@ impl crate::long_mode::OperandVisitor for Coloriz write!(self.f, "[{} + {} * {} ", regspec_label(&base), regspec_label(&index), - self.colors.number(scale), + scale, )?; - format_number_i32(self.colors, self.f, disp, NumberStyleHint::HexSignedWithSignSplit)?; + let mut v = disp as u32; + if disp < 0 { + self.f.write_fixed_size("- 0x")?; + v = -disp as u32; + } else { + self.f.write_fixed_size("+ 0x")?; + } + self.f.write_u32(v)?; write!(self.f, "]")?; write!(self.f, "{{{}}}", regspec_label(&mask_reg)) } @@ -5740,12 +5767,12 @@ impl Instruction { } pub fn write_to(&self, out: &mut T) -> fmt::Result { - contextualize_intel(self, &NoColors, 0, Some(&NoContext), out) + contextualize_intel(self, 0, Some(&NoContext), out) // self.display_with(DisplayStyle::Intel).contextualize(&NoColors, 0, Some(&NoContext), out) } } -fn contextualize_intel(instr: &Instruction, colors: &Y, _address: u64, _context: Option<&NoContext>, out: &mut T) -> fmt::Result { +fn contextualize_intel(instr: &Instruction, _address: u64, _context: Option<&NoContext>, out: &mut T) -> fmt::Result { if instr.xacquire() { out.write_fixed_size("xacquire ")?; } @@ -5774,7 +5801,6 @@ fn contextualize_intel(instr: &Instruction, colors if instr.visit_operand(0, &mut RelativeBranchPrinter { inst: instr, - colors, out, })? { return Ok(()); @@ -5784,7 +5810,6 @@ fn contextualize_intel(instr: &Instruction, colors instr, op_nr: 0, f: out, - colors, }; instr.visit_operand(0 as u8, &mut displayer)?; @@ -5796,7 +5821,6 @@ fn contextualize_intel(instr: &Instruction, colors instr, op_nr: i, f: out, - colors, }; instr.visit_operand(i as u8, &mut displayer)?; if let Some(evex) = instr.prefixes.evex() { @@ -5853,7 +5877,7 @@ fn contextualize_intel(instr: &Instruction, colors Ok(()) } -fn contextualize_c(instr: &Instruction, colors: &Y, _address: u64, _context: Option<&NoContext>, out: &mut T) -> fmt::Result { +fn contextualize_c(instr: &Instruction, _address: u64, _context: Option<&NoContext>, out: &mut T) -> fmt::Result { let mut brace_count = 0; let mut prefixed = false; @@ -5899,20 +5923,20 @@ fn contextualize_c(instr: &Instruction, colors: &Y, } } - fn write_jmp_operand(op: Operand, colors: &Y, out: &mut T) -> fmt::Result { + fn write_jmp_operand(op: Operand, out: &mut T) -> fmt::Result { match op { Operand::ImmediateI8(rel) => { if rel >= 0 { - write!(out, "$+{}", colors.number(signed_i32_hex(rel as i32))) + write!(out, "$+{}", (signed_i32_hex(rel as i32))) } else { - write!(out, "${}", colors.number(signed_i32_hex(rel as i32))) + write!(out, "${}", (signed_i32_hex(rel as i32))) } } Operand::ImmediateI32(rel) => { if rel >= 0 { - write!(out, "$+{}", colors.number(signed_i32_hex(rel))) + write!(out, "$+{}", (signed_i32_hex(rel))) } else { - write!(out, "${}", colors.number(signed_i32_hex(rel))) + write!(out, "${}", (signed_i32_hex(rel))) } } other => { @@ -6076,87 +6100,87 @@ fn contextualize_c(instr: &Instruction, colors: &Y, } Opcode::JMP => { out.write_str("jmp ")?; - write_jmp_operand(instr.operand(0), colors, out)?; + write_jmp_operand(instr.operand(0), out)?; }, Opcode::JRCXZ => { out.write_str("if rcx == 0 then jmp ")?; - write_jmp_operand(instr.operand(0), colors, out)?; + write_jmp_operand(instr.operand(0), out)?; }, Opcode::LOOP => { out.write_str("rcx--; if rcx != 0 then jmp ")?; - write_jmp_operand(instr.operand(0), colors, out)?; + write_jmp_operand(instr.operand(0), out)?; }, Opcode::LOOPZ => { out.write_str("rcx--; if rcx != 0 and zero(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), colors, out)?; + write_jmp_operand(instr.operand(0), out)?; }, Opcode::LOOPNZ => { out.write_str("rcx--; if rcx != 0 and !zero(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), colors, out)?; + write_jmp_operand(instr.operand(0), out)?; }, Opcode::JO => { out.write_str("if _(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), colors, out)?; + write_jmp_operand(instr.operand(0), out)?; }, Opcode::JNO => { out.write_str("if _(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), colors, out)?; + write_jmp_operand(instr.operand(0), out)?; }, Opcode::JB => { out.write_str("if /* unsigned */ below(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), colors, out)?; + write_jmp_operand(instr.operand(0), out)?; }, Opcode::JNB => { out.write_str("if /* unsigned */ above_or_equal(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), colors, out)?; + write_jmp_operand(instr.operand(0), out)?; }, Opcode::JZ => { out.write_str("if zero(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), colors, out)?; + write_jmp_operand(instr.operand(0), out)?; }, Opcode::JNZ => { out.write_str("if !zero(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), colors, out)?; + write_jmp_operand(instr.operand(0), out)?; }, Opcode::JNA => { out.write_str("if /* unsigned */ below_or_equal(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), colors, out)?; + write_jmp_operand(instr.operand(0), out)?; }, Opcode::JA => { out.write_str("if /* unsigned */ above(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), colors, out)?; + write_jmp_operand(instr.operand(0), out)?; }, Opcode::JS => { out.write_str("if signed(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), colors, out)?; + write_jmp_operand(instr.operand(0), out)?; }, Opcode::JNS => { out.write_str("if !signed(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), colors, out)?; + write_jmp_operand(instr.operand(0), out)?; }, Opcode::JP => { out.write_str("if parity(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), colors, out)?; + write_jmp_operand(instr.operand(0), out)?; }, Opcode::JNP => { out.write_str("if !parity(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), colors, out)?; + write_jmp_operand(instr.operand(0), out)?; }, Opcode::JL => { out.write_str("if /* signed */ less(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), colors, out)?; + write_jmp_operand(instr.operand(0), out)?; }, Opcode::JGE => { out.write_str("if /* signed */ greater_or_equal(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), colors, out)?; + write_jmp_operand(instr.operand(0), out)?; }, Opcode::JLE => { out.write_str("if /* signed */ less_or_equal(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), colors, out)?; + write_jmp_operand(instr.operand(0), out)?; }, Opcode::JG => { out.write_str("if /* signed */ greater(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), colors, out)?; + write_jmp_operand(instr.operand(0), out)?; }, Opcode::NOP => { write!(out, "nop")?; @@ -6201,10 +6225,10 @@ impl <'instr, T: fmt::Write, Y: YaxColors> ShowContextual }; let mut out = &mut out; - contextualize_intel(instr, colors, address, context, out) + contextualize_intel(instr, address, context, out) } DisplayStyle::C => { - contextualize_c(instr, colors, address, context, out) + contextualize_c(instr, address, context, out) } } } @@ -6255,7 +6279,6 @@ impl ShowContextual ShowContextual { +struct RelativeBranchPrinter<'a, F: DisplaySink> { inst: &'a Instruction, - colors: &'a Y, out: &'a mut F, } -impl<'a, Y: YaxColors, F: DisplaySink> crate::long_mode::OperandVisitor for RelativeBranchPrinter<'a, Y, F> { +impl<'a, F: DisplaySink> crate::long_mode::OperandVisitor for RelativeBranchPrinter<'a, F> { // return true if we printed a relative branch offset, false otherwise type Ok = bool; // but errors are errors From 2475794b7042fea9e8e3d637a5b3787a0939e89e Mon Sep 17 00:00:00 2001 From: iximeow Date: Tue, 18 Jun 2024 21:56:59 -0700 Subject: [PATCH 35/95] move avx512 operand printing off of fmt --- src/long_mode/display.rs | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index a303872..4f80950 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -1386,7 +1386,7 @@ impl crate::long_mode::OperandVisitor for ColorizingOperandVisi unsafe { self.f.write_lt_8(regspec_label(&spec))?; } if mask.num != 0 { self.f.write_fixed_size("{")?; - self.f.write_str(regspec_label(&mask))?; + unsafe { self.f.write_lt_8(regspec_label(&mask))?; } self.f.write_fixed_size("}")?; } if let MergeMode::Zero = merge_mode { @@ -1563,7 +1563,7 @@ impl crate::long_mode::OperandVisitor for ColorizingOperandVisi self.f.write_fixed_size("+ 0x")?; } self.f.write_u32(v)?; - write!(self.f, "]")?; + self.f.write_char(']')?; write!(self.f, "{{{}}}", regspec_label(&mask_reg)) } fn visit_reg_deref_masked(&mut self, spec: RegSpec, mask_reg: RegSpec) -> Result { @@ -1585,11 +1585,11 @@ impl crate::long_mode::OperandVisitor for ColorizingOperandVisi } fn visit_reg_scale_disp_masked(&mut self, spec: RegSpec, scale: u8, disp: i32, mask_reg: RegSpec) -> Result { unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS.get_kinda_unchecked(self.instr.mem_size as usize))? }; + self.f.write_fixed_size(" [")?; + unsafe { self.f.write_lt_8(regspec_label(&spec))?; } + self.f.write_fixed_size(" * ")?; + self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc self.f.write_fixed_size(" ")?; - write!(self.f, "[{} * {} ", - regspec_label(&spec), - scale, - )?; let mut v = disp as u32; if disp < 0 { self.f.write_fixed_size("- 0x")?; @@ -1598,7 +1598,7 @@ impl crate::long_mode::OperandVisitor for ColorizingOperandVisi self.f.write_fixed_size("+ 0x")?; } self.f.write_u32(v)?; - write!(self.f, "]")?; + self.f.write_char(']')?; write!(self.f, "{{{}}}", regspec_label(&mask_reg)) } fn visit_index_base_masked(&mut self, base: RegSpec, index: RegSpec, mask_reg: RegSpec) -> Result { @@ -1613,11 +1613,11 @@ impl crate::long_mode::OperandVisitor for ColorizingOperandVisi } fn visit_index_base_disp_masked(&mut self, base: RegSpec, index: RegSpec, disp: i32, mask_reg: RegSpec) -> Result { unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS.get_kinda_unchecked(self.instr.mem_size as usize))? }; + self.f.write_fixed_size(" [")?; + unsafe { self.f.write_lt_8(regspec_label(&base))?; } + self.f.write_fixed_size(" + ")?; + unsafe { self.f.write_lt_8(regspec_label(&index))?; } self.f.write_fixed_size(" ")?; - write!(self.f, "[{} + {} ", - regspec_label(&base), - regspec_label(&index), - )?; let mut v = disp as u32; if disp < 0 { self.f.write_fixed_size("- 0x")?; @@ -1626,17 +1626,18 @@ impl crate::long_mode::OperandVisitor for ColorizingOperandVisi self.f.write_fixed_size("+ 0x")?; } self.f.write_u32(v)?; - write!(self.f, "]")?; + self.f.write_char(']')?; write!(self.f, "{{{}}}", regspec_label(&mask_reg)) } fn visit_index_base_scale_masked(&mut self, base: RegSpec, index: RegSpec, scale: u8, mask_reg: RegSpec) -> Result { unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS.get_kinda_unchecked(self.instr.mem_size as usize))? }; - self.f.write_fixed_size(" ")?; - write!(self.f, "[{} + {} * {}]", - regspec_label(&base), - regspec_label(&index), - scale - )?; + self.f.write_fixed_size(" [")?; + unsafe { self.f.write_lt_8(regspec_label(&base))?; } + self.f.write_fixed_size(" + ")?; + unsafe { self.f.write_lt_8(regspec_label(&index))?; } + self.f.write_fixed_size(" * ")?; + self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc + self.f.write_fixed_size("]")?; write!(self.f, "{{{}}}", regspec_label(&mask_reg)) } fn visit_index_base_scale_disp_masked(&mut self, base: RegSpec, index: RegSpec, scale: u8, disp: i32, mask_reg: RegSpec) -> Result { @@ -1655,7 +1656,7 @@ impl crate::long_mode::OperandVisitor for ColorizingOperandVisi self.f.write_fixed_size("+ 0x")?; } self.f.write_u32(v)?; - write!(self.f, "]")?; + self.f.write_char(']')?; write!(self.f, "{{{}}}", regspec_label(&mask_reg)) } From cbdfa8402603e3a756b10d1527f1dc3c594f57cd Mon Sep 17 00:00:00 2001 From: iximeow Date: Tue, 18 Jun 2024 22:07:00 -0700 Subject: [PATCH 36/95] inline the write u8/u32 helpers, lets see what that does --- src/long_mode/display.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index 4f80950..1d14ff8 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -740,6 +740,7 @@ impl DisplaySink for alloc::string::String { /// this is provided for optimization opportunities when the formatted integer can be written /// directly to the sink (rather than formatted to an intermediate buffer and output as a /// followup step) + #[inline(always)] fn write_u8(&mut self, mut v: u8) -> Result<(), core::fmt::Error> { // we can fairly easily predict the size of a formatted string here with lzcnt, which also // means we can write directly into the correct offsets of the output string. @@ -803,6 +804,7 @@ impl DisplaySink for alloc::string::String { /// this is provided for optimization opportunities when the formatted integer can be written /// directly to the sink (rather than formatted to an intermediate buffer and output as a /// followup step) + #[inline(always)] fn write_u32(&mut self, mut v: u32) -> Result<(), core::fmt::Error> { // we can fairly easily predict the size of a formatted string here with lzcnt, which also // means we can write directly into the correct offsets of the output string. @@ -1073,6 +1075,7 @@ impl DisplaySink for BigEnoughString { /// this is provided for optimization opportunities when the formatted integer can be written /// directly to the sink (rather than formatted to an intermediate buffer and output as a /// followup step) + #[inline(always)] fn write_u8(&mut self, mut v: u8) -> Result<(), core::fmt::Error> { // we can fairly easily predict the size of a formatted string here with lzcnt, which also // means we can write directly into the correct offsets of the output string. @@ -1133,6 +1136,7 @@ impl DisplaySink for BigEnoughString { /// this is provided for optimization opportunities when the formatted integer can be written /// directly to the sink (rather than formatted to an intermediate buffer and output as a /// followup step) + #[inline(always)] fn write_u32(&mut self, mut v: u32) -> Result<(), core::fmt::Error> { // we can fairly easily predict the size of a formatted string here with lzcnt, which also // means we can write directly into the correct offsets of the output string. From 89838f6bc67d0f7e8ae509a8bfa9d7e2ccc3137c Mon Sep 17 00:00:00 2001 From: iximeow Date: Tue, 18 Jun 2024 22:28:06 -0700 Subject: [PATCH 37/95] unreachable panics for impossible op_nr. clean this up though.. --- src/long_mode/display.rs | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index 1d14ff8..24d35be 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -1402,6 +1402,9 @@ impl crate::long_mode::OperandVisitor for ColorizingOperandVisi fn visit_abs_u32(&mut self, imm: u32) -> Result { unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS.get_kinda_unchecked(self.instr.mem_size as usize))? }; self.f.write_fixed_size(" ")?; + if self.op_nr >= 4 { + unsafe { core::hint::unreachable_unchecked(); } + } if let Some(prefix) = self.instr.segment_override_for_op(self.op_nr) { let name = prefix.name(); self.f.write_char(name[0] as char)?; @@ -1417,6 +1420,9 @@ impl crate::long_mode::OperandVisitor for ColorizingOperandVisi fn visit_abs_u64(&mut self, imm: u64) -> Result { unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS.get_kinda_unchecked(self.instr.mem_size as usize))? }; self.f.write_fixed_size(" ")?; + if self.op_nr >= 4 { + unsafe { core::hint::unreachable_unchecked(); } + } if let Some(prefix) = self.instr.segment_override_for_op(self.op_nr) { let name = prefix.name(); self.f.write_char(name[0] as char)?; @@ -1428,6 +1434,9 @@ impl crate::long_mode::OperandVisitor for ColorizingOperandVisi fn visit_disp(&mut self, reg: RegSpec, disp: i32) -> Result { unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS.get_kinda_unchecked(self.instr.mem_size as usize))? }; self.f.write_fixed_size(" ")?; + if self.op_nr >= 4 { + unsafe { core::hint::unreachable_unchecked(); } + } if let Some(prefix) = self.instr.segment_override_for_op(self.op_nr) { let name = prefix.name(); self.f.write_char(name[0] as char)?; @@ -1453,6 +1462,9 @@ impl crate::long_mode::OperandVisitor for ColorizingOperandVisi fn visit_deref(&mut self, reg: RegSpec) -> Result { unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS.get_kinda_unchecked(self.instr.mem_size as usize))? }; self.f.write_fixed_size(" ")?; + if self.op_nr >= 4 { + unsafe { core::hint::unreachable_unchecked(); } + } if let Some(prefix) = self.instr.segment_override_for_op(self.op_nr) { let name = prefix.name(); self.f.write_char(name[0] as char)?; @@ -1466,6 +1478,9 @@ impl crate::long_mode::OperandVisitor for ColorizingOperandVisi fn visit_reg_scale(&mut self, reg: RegSpec, scale: u8) -> Result { unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS.get_kinda_unchecked(self.instr.mem_size as usize))? }; self.f.write_fixed_size(" ")?; + if self.op_nr >= 4 { + unsafe { core::hint::unreachable_unchecked(); } + } if let Some(prefix) = self.instr.segment_override_for_op(self.op_nr) { let name = prefix.name(); self.f.write_char(name[0] as char)?; @@ -1483,6 +1498,9 @@ impl crate::long_mode::OperandVisitor for ColorizingOperandVisi fn visit_reg_scale_disp(&mut self, reg: RegSpec, scale: u8, disp: i32) -> Result { unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS.get_kinda_unchecked(self.instr.mem_size as usize))? }; self.f.write_fixed_size(" ")?; + if self.op_nr >= 4 { + unsafe { core::hint::unreachable_unchecked(); } + } if let Some(prefix) = self.instr.segment_override_for_op(self.op_nr) { let name = prefix.name(); self.f.write_char(name[0] as char)?; @@ -1510,6 +1528,9 @@ impl crate::long_mode::OperandVisitor for ColorizingOperandVisi fn visit_index_base_scale(&mut self, base: RegSpec, index: RegSpec, scale: u8) -> Result { unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS.get_kinda_unchecked(self.instr.mem_size as usize))? }; self.f.write_fixed_size(" ")?; + if self.op_nr >= 4 { + unsafe { core::hint::unreachable_unchecked(); } + } if let Some(prefix) = self.instr.segment_override_for_op(self.op_nr) { let name = prefix.name(); self.f.write_char(name[0] as char)?; @@ -1527,6 +1548,9 @@ impl crate::long_mode::OperandVisitor for ColorizingOperandVisi fn visit_index_base_scale_disp(&mut self, base: RegSpec, index: RegSpec, scale: u8, disp: i32) -> Result { unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS.get_kinda_unchecked(self.instr.mem_size as usize))? }; self.f.write_fixed_size(" ")?; + if self.op_nr >= 4 { + unsafe { core::hint::unreachable_unchecked(); } + } if let Some(prefix) = self.instr.segment_override_for_op(self.op_nr) { let name = prefix.name(); self.f.write_char(name[0] as char)?; From e904f613a29a5e3edfe20991b38356e77c857ba9 Mon Sep 17 00:00:00 2001 From: iximeow Date: Tue, 18 Jun 2024 22:36:54 -0700 Subject: [PATCH 38/95] try grouping characters printed with or without segment prefixes --- src/long_mode/display.rs | 42 +++++++++++++++++++++++----------------- 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index 24d35be..355cbc7 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -1433,17 +1433,18 @@ impl crate::long_mode::OperandVisitor for ColorizingOperandVisi } fn visit_disp(&mut self, reg: RegSpec, disp: i32) -> Result { unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS.get_kinda_unchecked(self.instr.mem_size as usize))? }; - self.f.write_fixed_size(" ")?; if self.op_nr >= 4 { unsafe { core::hint::unreachable_unchecked(); } } if let Some(prefix) = self.instr.segment_override_for_op(self.op_nr) { let name = prefix.name(); + self.f.write_char(' ')?; self.f.write_char(name[0] as char)?; self.f.write_char(name[1] as char)?; - self.f.write_char(':')?; + self.f.write_fixed_size(":[")?; + } else { + self.f.write_fixed_size(" [")?; } - self.f.write_fixed_size("[")?; unsafe { self.f.write_lt_8(regspec_label(®))?; } self.f.write_fixed_size(" ")?; @@ -1461,33 +1462,35 @@ impl crate::long_mode::OperandVisitor for ColorizingOperandVisi } fn visit_deref(&mut self, reg: RegSpec) -> Result { unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS.get_kinda_unchecked(self.instr.mem_size as usize))? }; - self.f.write_fixed_size(" ")?; if self.op_nr >= 4 { unsafe { core::hint::unreachable_unchecked(); } } if let Some(prefix) = self.instr.segment_override_for_op(self.op_nr) { let name = prefix.name(); + self.f.write_char(' ')?; self.f.write_char(name[0] as char)?; self.f.write_char(name[1] as char)?; - self.f.write_char(':')?; + self.f.write_fixed_size(":[")?; + } else { + self.f.write_fixed_size(" [")?; } - self.f.write_fixed_size("[")?; unsafe { self.f.write_lt_8(regspec_label(®))?; } self.f.write_fixed_size("]") } fn visit_reg_scale(&mut self, reg: RegSpec, scale: u8) -> Result { unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS.get_kinda_unchecked(self.instr.mem_size as usize))? }; - self.f.write_fixed_size(" ")?; if self.op_nr >= 4 { unsafe { core::hint::unreachable_unchecked(); } } if let Some(prefix) = self.instr.segment_override_for_op(self.op_nr) { let name = prefix.name(); + self.f.write_char(' ')?; self.f.write_char(name[0] as char)?; self.f.write_char(name[1] as char)?; - self.f.write_char(':')?; + self.f.write_fixed_size(":[")?; + } else { + self.f.write_fixed_size(" [")?; } - self.f.write_fixed_size("[")?; unsafe { self.f.write_lt_8(regspec_label(®))?; } self.f.write_fixed_size(" * ")?; self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc @@ -1497,17 +1500,18 @@ impl crate::long_mode::OperandVisitor for ColorizingOperandVisi } fn visit_reg_scale_disp(&mut self, reg: RegSpec, scale: u8, disp: i32) -> Result { unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS.get_kinda_unchecked(self.instr.mem_size as usize))? }; - self.f.write_fixed_size(" ")?; if self.op_nr >= 4 { unsafe { core::hint::unreachable_unchecked(); } } if let Some(prefix) = self.instr.segment_override_for_op(self.op_nr) { let name = prefix.name(); + self.f.write_char(' ')?; self.f.write_char(name[0] as char)?; self.f.write_char(name[1] as char)?; - self.f.write_char(':')?; + self.f.write_fixed_size(":[")?; + } else { + self.f.write_fixed_size(" [")?; } - self.f.write_fixed_size("[")?; unsafe { self.f.write_lt_8(regspec_label(®))?; } self.f.write_fixed_size(" * ")?; self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc @@ -1527,17 +1531,18 @@ impl crate::long_mode::OperandVisitor for ColorizingOperandVisi } fn visit_index_base_scale(&mut self, base: RegSpec, index: RegSpec, scale: u8) -> Result { unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS.get_kinda_unchecked(self.instr.mem_size as usize))? }; - self.f.write_fixed_size(" ")?; if self.op_nr >= 4 { unsafe { core::hint::unreachable_unchecked(); } } if let Some(prefix) = self.instr.segment_override_for_op(self.op_nr) { let name = prefix.name(); + self.f.write_char(' ')?; self.f.write_char(name[0] as char)?; self.f.write_char(name[1] as char)?; - self.f.write_char(':')?; + self.f.write_fixed_size(":[")?; + } else { + self.f.write_fixed_size(" [")?; } - self.f.write_fixed_size("[")?; unsafe { self.f.write_lt_8(regspec_label(&base))?; } self.f.write_fixed_size(" + ")?; unsafe { self.f.write_lt_8(regspec_label(&index))?; } @@ -1547,17 +1552,18 @@ impl crate::long_mode::OperandVisitor for ColorizingOperandVisi } fn visit_index_base_scale_disp(&mut self, base: RegSpec, index: RegSpec, scale: u8, disp: i32) -> Result { unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS.get_kinda_unchecked(self.instr.mem_size as usize))? }; - self.f.write_fixed_size(" ")?; if self.op_nr >= 4 { unsafe { core::hint::unreachable_unchecked(); } } if let Some(prefix) = self.instr.segment_override_for_op(self.op_nr) { let name = prefix.name(); + self.f.write_char(' ')?; self.f.write_char(name[0] as char)?; self.f.write_char(name[1] as char)?; - self.f.write_char(':')?; + self.f.write_fixed_size(":[")?; + } else { + self.f.write_fixed_size(" [")?; } - self.f.write_fixed_size("[")?; unsafe { self.f.write_lt_8(regspec_label(&base))?; } self.f.write_fixed_size(" + ")?; unsafe { self.f.write_lt_8(regspec_label(&index))?; } From d91b2d1443c6ba4e850023032d4e93beab820fdc Mon Sep 17 00:00:00 2001 From: iximeow Date: Tue, 18 Jun 2024 22:50:22 -0700 Subject: [PATCH 39/95] more unused arguments --- src/long_mode/display.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index 355cbc7..10ceb89 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -5802,12 +5802,12 @@ impl Instruction { } pub fn write_to(&self, out: &mut T) -> fmt::Result { - contextualize_intel(self, 0, Some(&NoContext), out) + contextualize_intel(self, out) // self.display_with(DisplayStyle::Intel).contextualize(&NoColors, 0, Some(&NoContext), out) } } -fn contextualize_intel(instr: &Instruction, _address: u64, _context: Option<&NoContext>, out: &mut T) -> fmt::Result { +fn contextualize_intel(instr: &Instruction, out: &mut T) -> fmt::Result { if instr.xacquire() { out.write_fixed_size("xacquire ")?; } @@ -6260,7 +6260,7 @@ impl <'instr, T: fmt::Write, Y: YaxColors> ShowContextual }; let mut out = &mut out; - contextualize_intel(instr, address, context, out) + contextualize_intel(instr, out) } DisplayStyle::C => { contextualize_c(instr, address, context, out) From 1dc74628d428e2e65c59147900ea44a9db7ab163 Mon Sep 17 00:00:00 2001 From: iximeow Date: Tue, 18 Jun 2024 23:02:33 -0700 Subject: [PATCH 40/95] make write_2 work again for comparison (kinda) --- src/long_mode/display.rs | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index 10ceb89..d6e03a9 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -5144,7 +5144,6 @@ impl Instruction { } } - /* let address: u64 = 0; let context = Some(&NoContext); let colors = &NoColors; @@ -5176,11 +5175,16 @@ impl Instruction { if self.operand_count > 0 { danger_anguished_string_write(out, " "); - if self.visit_operand(0, &mut RelativeBranchPrinter { - inst: &self, - colors, - out, - })? { + let rel_res = { + let out = unsafe { core::mem::transmute::<&mut alloc::string::String, &mut BigEnoughString>(out) }; + self.visit_operand(0, &mut RelativeBranchPrinter { + inst: &self, + out: &mut NoColorsSink { + out: out, + }, + })? + }; + if rel_res { return Ok(()); } @@ -5797,7 +5801,6 @@ impl Instruction { } } } - */ Ok(()) } From cc6f7cabfdd7e48f71241ffc8e4860be6d26ba93 Mon Sep 17 00:00:00 2001 From: iximeow Date: Tue, 18 Jun 2024 23:14:03 -0700 Subject: [PATCH 41/95] helpers for those i8/u8 immediates too --- src/long_mode/display.rs | 39 ++------------------------------------- 1 file changed, 2 insertions(+), 37 deletions(-) diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index d6e03a9..77111b8 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -1208,24 +1208,7 @@ impl crate::long_mode::OperandVisitor for ColorizingOperandVisi fn visit_u8(&mut self, imm: u8) -> Result { self.f.span_enter(TokenType::Immediate); self.f.write_fixed_size("0x")?; - let mut buf = [MaybeUninit::::uninit(); 2]; - let mut curr = buf.len(); - let mut v = imm; - loop { - let digit = v % 16; - let c = c_to_hex(digit as u8); - curr -= 1; - buf[curr].write(c); - v = v / 16; - if v == 0 { - break; - } - } - let buf = &buf[curr..]; - let s: &str = unsafe { - core::mem::transmute::<&[MaybeUninit], &str>(buf) - }; - self.f.write_fixed_size(s)?; + self.f.write_u8(imm)?; self.f.span_end(TokenType::Immediate); Ok(()) } @@ -1237,25 +1220,7 @@ impl crate::long_mode::OperandVisitor for ColorizingOperandVisi v = -imm as u8; } self.f.write_fixed_size("0x")?; - let mut buf = [core::mem::MaybeUninit::::uninit(); 2]; - let mut curr = buf.len(); - loop { - let digit = v % 16; - let c = c_to_hex(digit as u8); - curr -= 1; - buf[curr].write(c); - v = v / 16; - if v == 0 { - break; - } - } - let buf = &buf[curr..]; - let s = unsafe { - core::mem::transmute::<&[core::mem::MaybeUninit], &str>(buf) - }; - - // not actually fixed size, but this should optimize right i hope.. - self.f.write_fixed_size(s)?; + self.f.write_u8(v)?; self.f.span_end(TokenType::Immediate); Ok(()) } From 6b0a7aa23f1f125213081cee1f37079e53f05882 Mon Sep 17 00:00:00 2001 From: iximeow Date: Tue, 18 Jun 2024 23:37:41 -0700 Subject: [PATCH 42/95] configurable inlining to help with opts --- Cargo.toml | 4 ++++ src/long_mode/display.rs | 3 ++- src/long_mode/mod.rs | 11 ++++++++--- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index e8734cc..34f8af7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -52,3 +52,7 @@ colors = ["yaxpeax-arch/colors"] # This enables some capstone benchmarks over the same # instruction bytes used to bench this code. capstone_bench = [] + +# this disables a lot of inlining to make it easier for me to measure +# likelihood of codepaths for typical instruction sequences +profiling = [] diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index 77111b8..d0aef26 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -384,7 +384,6 @@ pub trait DisplaySink: fmt::Write { /// utf-8 string. this may corrupt Rust strings. unsafe fn write_lt_32(&mut self, s: &str) -> Result<(), core::fmt::Error> { self.write_str(s) - } /// write a string to this sink that is less than 16 bytes. this is provided for optimization /// opportunities when writing a variable-length string with known max size. @@ -5769,12 +5768,14 @@ impl Instruction { Ok(()) } + #[cfg_attr(feature="profiling", inline(never))] pub fn write_to(&self, out: &mut T) -> fmt::Result { contextualize_intel(self, out) // self.display_with(DisplayStyle::Intel).contextualize(&NoColors, 0, Some(&NoContext), out) } } +#[cfg_attr(feature="profiling", inline(never))] fn contextualize_intel(instr: &Instruction, out: &mut T) -> fmt::Result { if instr.xacquire() { out.write_fixed_size("xacquire ")?; diff --git a/src/long_mode/mod.rs b/src/long_mode/mod.rs index e6d0a02..5a5b89a 100644 --- a/src/long_mode/mod.rs +++ b/src/long_mode/mod.rs @@ -4383,6 +4383,7 @@ impl Instruction { /// if the work you expect to do per-operand is very small, constructing an `Operand` and /// dispatching on tags may be a substantial factor of overall runtime. `visit_operand` can /// reduce total overhead in such cases. + #[cfg_attr(features="profiling", inline(never))] fn visit_operand(&self, i: u8, visitor: &mut T) -> Result { assert!(i < 4); let spec = self.operands[i as usize]; @@ -6009,7 +6010,8 @@ const OPCODES: [OpcodeRecord; 256] = [ ]; #[allow(non_snake_case)] -#[inline(always)] +#[cfg_attr(feature="profiling", inline(never))] +#[cfg_attr(not(feature="profiling"), inline(always))] pub(self) fn read_E< T: Reader<::Address, ::Word>, S: DescriptionSink, @@ -6638,6 +6640,7 @@ impl DecodeCtx { self.rb_size } +#[cfg_attr(feature="profiling", inline(never))] fn read_opc_hotpath< T: Reader<::Address, ::Word>, S: DescriptionSink, @@ -6694,7 +6697,8 @@ fn read_opc_hotpath< } } -#[inline(always)] +#[cfg_attr(feature="profiling", inline(never))] +#[cfg_attr(not(feature="profiling"), inline(always))] fn read_with_annotations< T: Reader<::Address, ::Word>, S: DescriptionSink, @@ -6912,7 +6916,8 @@ fn read_avx_prefixed< return Ok(()); } -#[inline(always)] +#[cfg_attr(feature="profiling", inline(never))] +#[cfg_attr(not(feature="profiling"), inline(always))] fn read_operands< T: Reader<::Address, ::Word>, S: DescriptionSink From f6ad0a91226b12cb7ec928dbbb6983ea0425d9e2 Mon Sep 17 00:00:00 2001 From: iximeow Date: Wed, 19 Jun 2024 00:54:03 -0700 Subject: [PATCH 43/95] hint better about codegen for contextualize_intel --- src/long_mode/display.rs | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index d0aef26..2404847 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -5826,6 +5826,9 @@ fn contextualize_intel(instr: &Instruction, out: &mut T) -> fmt: op_nr: i, f: out, }; + if i >= 4 { + unsafe { core::hint::unreachable_unchecked(); } + } instr.visit_operand(i as u8, &mut displayer)?; if let Some(evex) = instr.prefixes.evex() { let x = Operand::from_spec(instr, instr.operands[i as usize]); @@ -5873,7 +5876,15 @@ fn contextualize_intel(instr: &Instruction, out: &mut T) -> fmt: 0 } }; - write!(out, "{{1to{}}}", scale)?; + out.write_fixed_size("{1to")?; + static STRING_LUT: &'static [&'static str] = &[ + "0", "1", "2", "3", "4", "5", "6", "7", "8", + "9", "10", "11", "12", "13", "14", "15", "16", + ]; + unsafe { + out.write_lt_16(STRING_LUT.get_kinda_unchecked(scale as usize))?; + } + out.write_char('}')?; } } } From 85700ee8b91afcada27a9b4fffda498adf4573dc Mon Sep 17 00:00:00 2001 From: iximeow Date: Wed, 19 Jun 2024 01:22:32 -0700 Subject: [PATCH 44/95] write_u64 helpers --- src/long_mode/display.rs | 116 ++++++++++++++++++++++++++------------- 1 file changed, 78 insertions(+), 38 deletions(-) diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index 2404847..d9f7f9e 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -428,6 +428,14 @@ pub trait DisplaySink: fmt::Write { fn write_u32(&mut self, v: u32) -> Result<(), core::fmt::Error> { write!(self, "{:x}", v) } + /// write a u64 to the output as a base-16 integer. + /// + /// this is provided for optimization opportunities when the formatted integer can be written + /// directly to the sink (rather than formatted to an intermediate buffer and output as a + /// followup step) + fn write_u64(&mut self, v: u64) -> Result<(), core::fmt::Error> { + write!(self, "{:x}", v) + } // fn write_char(&mut self, c: char) -> Result<(), core::fmt::Error>; fn span_enter(&mut self, ty: TokenType); fn span_end(&mut self, ty: TokenType); @@ -830,6 +838,38 @@ impl DisplaySink for alloc::string::String { Ok(()) } + /// write a u64 to the output as a base-16 integer. + /// + /// this is provided for optimization opportunities when the formatted integer can be written + /// directly to the sink (rather than formatted to an intermediate buffer and output as a + /// followup step) + #[inline(always)] + fn write_u64(&mut self, mut v: u64) -> Result<(), core::fmt::Error> { + // we can fairly easily predict the size of a formatted string here with lzcnt, which also + // means we can write directly into the correct offsets of the output string. + let printed_size = (((64 - v.leading_zeros()) >> 2) + 1) as usize; + self.reserve(printed_size); + + unsafe { + let buf = unsafe { self.as_mut_vec() }; + let p = buf.as_mut_ptr(); + let mut curr = printed_size; + loop { + let digit = v % 16; + let c = c_to_hex(digit as u8); + curr -= 1; + p.offset(curr as isize).write(c); + v = v / 16; + if v == 0 { + break; + } + } + + buf.set_len(buf.len() + printed_size); + } + + Ok(()) + } fn span_enter(&mut self, ty: TokenType) {} fn span_end(&mut self, ty: TokenType) {} } @@ -1161,6 +1201,37 @@ impl DisplaySink for BigEnoughString { Ok(()) } + /// write a u64 to the output as a base-16 integer. + /// + /// this is provided for optimization opportunities when the formatted integer can be written + /// directly to the sink (rather than formatted to an intermediate buffer and output as a + /// followup step) + #[inline(always)] + fn write_u64(&mut self, mut v: u64) -> Result<(), core::fmt::Error> { + // we can fairly easily predict the size of a formatted string here with lzcnt, which also + // means we can write directly into the correct offsets of the output string. + let printed_size = (((64 - v.leading_zeros()) >> 2) + 1) as usize; + + unsafe { + let buf = unsafe { self.content.as_mut_vec() }; + let p = buf.as_mut_ptr(); + let mut curr = printed_size; + loop { + let digit = v % 16; + let c = c_to_hex(digit as u8); + curr -= 1; + p.offset(curr as isize).write(c); + v = v / 16; + if v == 0 { + break; + } + } + + buf.set_len(buf.len() + printed_size); + } + + Ok(()) + } fn span_enter(&mut self, ty: TokenType) {} fn span_end(&mut self, ty: TokenType) {} } @@ -1264,24 +1335,7 @@ impl crate::long_mode::OperandVisitor for ColorizingOperandVisi fn visit_u64(&mut self, imm: u64) -> Result { self.f.span_enter(TokenType::Immediate); self.f.write_fixed_size("0x")?; - let mut buf = [MaybeUninit::::uninit(); 16]; - let mut curr = buf.len(); - let mut v = imm; - loop { - let digit = v % 16; - let c = c_to_hex(digit as u8); - curr -= 1; - buf[curr].write(c); - v = v / 16; - if v == 0 { - break; - } - } - let buf = &buf[curr..]; - let s: &str = unsafe { - core::mem::transmute::<&[MaybeUninit], &str>(buf) - }; - self.f.write_fixed_size(s)?; + self.f.write_u64(imm)?; self.f.span_end(TokenType::Immediate); Ok(()) } @@ -1293,25 +1347,7 @@ impl crate::long_mode::OperandVisitor for ColorizingOperandVisi v = -imm as u64; } self.f.write_fixed_size("0x")?; - let mut buf = [core::mem::MaybeUninit::::uninit(); 64]; - let mut curr = buf.len(); - loop { - let digit = v % 16; - let c = c_to_hex(digit as u8); - curr -= 1; - buf[curr].write(c); - v = v / 16; - if v == 0 { - break; - } - } - let buf = &buf[curr..]; - let s = unsafe { - core::mem::transmute::<&[core::mem::MaybeUninit], &str>(buf) - }; - - // not actually fixed size, but this should optimize right i hope.. - self.f.write_fixed_size(s)?; + self.f.write_u64(v)?; self.f.span_end(TokenType::Immediate); Ok(()) } @@ -1393,7 +1429,11 @@ impl crate::long_mode::OperandVisitor for ColorizingOperandVisi self.f.write_char(name[1] as char)?; self.f.write_char(':')?; } - write!(self.f, "[{}]", u64_hex(imm)) + self.f.write_fixed_size("[")?; + self.f.write_fixed_size("0x")?; + self.f.write_u64(imm)?; + self.f.write_fixed_size("]")?; + Ok(()) } fn visit_disp(&mut self, reg: RegSpec, disp: i32) -> Result { unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS.get_kinda_unchecked(self.instr.mem_size as usize))? }; From 49e910b50066161fcf581c4aec775655f85cffe3 Mon Sep 17 00:00:00 2001 From: iximeow Date: Wed, 19 Jun 2024 10:22:03 -0700 Subject: [PATCH 45/95] more profiling outlining --- src/long_mode/display.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index d9f7f9e..a60b2fd 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -1275,6 +1275,7 @@ impl crate::long_mode::OperandVisitor for ColorizingOperandVisi type Ok = (); type Error = core::fmt::Error; + #[cfg_attr(feature="profiling", inline(never))] fn visit_u8(&mut self, imm: u8) -> Result { self.f.span_enter(TokenType::Immediate); self.f.write_fixed_size("0x")?; @@ -1282,6 +1283,7 @@ impl crate::long_mode::OperandVisitor for ColorizingOperandVisi self.f.span_end(TokenType::Immediate); Ok(()) } + #[cfg_attr(feature="profiling", inline(never))] fn visit_i8(&mut self, imm: i8) -> Result { self.f.span_enter(TokenType::Immediate); let mut v = imm as u8; @@ -1294,6 +1296,7 @@ impl crate::long_mode::OperandVisitor for ColorizingOperandVisi self.f.span_end(TokenType::Immediate); Ok(()) } + #[cfg_attr(feature="profiling", inline(never))] fn visit_u16(&mut self, imm: u16) -> Result { self.f.span_enter(TokenType::Immediate); self.f.write_fixed_size("0x")?; @@ -1301,6 +1304,7 @@ impl crate::long_mode::OperandVisitor for ColorizingOperandVisi self.f.span_end(TokenType::Immediate); Ok(()) } + #[cfg_attr(feature="profiling", inline(never))] fn visit_i16(&mut self, imm: i16) -> Result { self.f.span_enter(TokenType::Immediate); let mut v = imm as u16; @@ -1313,6 +1317,7 @@ impl crate::long_mode::OperandVisitor for ColorizingOperandVisi self.f.span_end(TokenType::Immediate); Ok(()) } + #[cfg_attr(feature="profiling", inline(never))] fn visit_u32(&mut self, imm: u32) -> Result { self.f.span_enter(TokenType::Immediate); self.f.write_fixed_size("0x")?; @@ -1332,6 +1337,7 @@ impl crate::long_mode::OperandVisitor for ColorizingOperandVisi self.f.span_end(TokenType::Immediate); Ok(()) } + #[cfg_attr(feature="profiling", inline(never))] fn visit_u64(&mut self, imm: u64) -> Result { self.f.span_enter(TokenType::Immediate); self.f.write_fixed_size("0x")?; @@ -1339,6 +1345,7 @@ impl crate::long_mode::OperandVisitor for ColorizingOperandVisi self.f.span_end(TokenType::Immediate); Ok(()) } + #[cfg_attr(feature="profiling", inline(never))] fn visit_i64(&mut self, imm: i64) -> Result { self.f.span_enter(TokenType::Immediate); let mut v = imm as u64; @@ -1351,6 +1358,7 @@ impl crate::long_mode::OperandVisitor for ColorizingOperandVisi self.f.span_end(TokenType::Immediate); Ok(()) } + #[cfg_attr(feature="profiling", inline(never))] fn visit_reg(&mut self, reg: RegSpec) -> Result { self.f.span_enter(TokenType::Register); unsafe { self.f.write_lt_8(regspec_label(®))?; } From 55548603b88ccf0babd081bd636c73ca164da919 Mon Sep 17 00:00:00 2001 From: iximeow Date: Wed, 19 Jun 2024 10:46:49 -0700 Subject: [PATCH 46/95] hoist set_len calls to have fewer live values --- src/long_mode/display.rs | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index a60b2fd..0188361 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -555,6 +555,10 @@ impl DisplaySink for alloc::string::String { let mut rem = new_bytes.len() as isize; + // set_len early because there is no way to avoid the following asm!() writing that + // same number of bytes into buf + buf.set_len(buf.len() + new_bytes.len()); + core::arch::asm!( "6:", "cmp {rem:e}, 16", @@ -598,8 +602,6 @@ impl DisplaySink for alloc::string::String { buf = out(reg) _, options(nostack), ); - - buf.set_len(buf.len() + new_bytes.len()); } /* for i in 0..new_bytes.len() { @@ -633,6 +635,10 @@ impl DisplaySink for alloc::string::String { let mut rem = new_bytes.len() as isize; + // set_len early because there is no way to avoid the following asm!() writing that + // same number of bytes into buf + buf.set_len(buf.len() + new_bytes.len()); + core::arch::asm!( "7:", "cmp {rem:e}, 8", @@ -667,8 +673,6 @@ impl DisplaySink for alloc::string::String { buf = out(reg) _, options(nostack), ); - - buf.set_len(buf.len() + new_bytes.len()); } /* for i in 0..new_bytes.len() { @@ -702,6 +706,10 @@ impl DisplaySink for alloc::string::String { let mut rem = new_bytes.len() as isize; + // set_len early because there is no way to avoid the following asm!() writing that + // same number of bytes into buf + buf.set_len(buf.len() + new_bytes.len()); + core::arch::asm!( "8:", "cmp {rem:e}, 4", @@ -729,8 +737,6 @@ impl DisplaySink for alloc::string::String { buf = out(reg) _, options(nostack), ); - - buf.set_len(buf.len() + new_bytes.len()); } /* for i in 0..new_bytes.len() { @@ -926,6 +932,10 @@ impl DisplaySink for BigEnoughString { let mut rem = new_bytes.len() as isize; + // set_len early because there is no way to avoid the following asm!() writing that + // same number of bytes into buf + buf.set_len(buf.len() + new_bytes.len()); + core::arch::asm!( "6:", "cmp {rem:e}, 16", @@ -969,8 +979,6 @@ impl DisplaySink for BigEnoughString { buf = out(reg) _, options(nostack), ); - - buf.set_len(buf.len() + new_bytes.len()); } /* for i in 0..new_bytes.len() { @@ -1002,6 +1010,10 @@ impl DisplaySink for BigEnoughString { let mut rem = new_bytes.len() as isize; + // set_len early because there is no way to avoid the following asm!() writing that + // same number of bytes into buf + buf.set_len(buf.len() + new_bytes.len()); + core::arch::asm!( "7:", "cmp {rem:e}, 8", @@ -1036,8 +1048,6 @@ impl DisplaySink for BigEnoughString { buf = out(reg) _, options(nostack), ); - - buf.set_len(buf.len() + new_bytes.len()); } /* for i in 0..new_bytes.len() { @@ -1069,6 +1079,10 @@ impl DisplaySink for BigEnoughString { let mut rem = new_bytes.len() as isize; + // set_len early because there is no way to avoid the following asm!() writing that + // same number of bytes into buf + buf.set_len(buf.len() + new_bytes.len()); + core::arch::asm!( "8:", "cmp {rem:e}, 4", @@ -1096,8 +1110,6 @@ impl DisplaySink for BigEnoughString { buf = out(reg) _, options(nostack), ); - - buf.set_len(buf.len() + new_bytes.len()); } /* for i in 0..new_bytes.len() { From 2e7bdee5a9c8a2d182ccbd643e5faf2fdf6442b7 Mon Sep 17 00:00:00 2001 From: iximeow Date: Wed, 19 Jun 2024 11:07:03 -0700 Subject: [PATCH 47/95] write_2 did its job, seem to have reaped all that can be reaped --- src/long_mode/display.rs | 1098 -------------------------------------- 1 file changed, 1098 deletions(-) diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index 0188361..84b0894 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -1281,8 +1281,6 @@ struct ColorizingOperandVisitor<'a, T> { f: &'a mut T, } -use core::mem::MaybeUninit; - impl crate::long_mode::OperandVisitor for ColorizingOperandVisitor<'_, T> { type Ok = (); type Error = core::fmt::Error; @@ -4732,1102 +4730,6 @@ fn c_to_hex(c: u8) -> u8 { } impl Instruction { -// pub fn write_2>(&self, out: &mut alloc::string::String) -> fmt::Result { - #[cfg_attr(feature="profiling", inline(never))] - pub fn write_2(&self, out: &mut alloc::string::String) -> fmt::Result { - use core::fmt::Write; - - fn anguished_string_write(out: &mut alloc::string::String, label: &str) { - let new_bytes = label.as_bytes(); - let buf = unsafe { out.as_mut_vec() }; - anguished_bstring_write(buf, new_bytes); - } - fn anguished_bstring_write(buf: &mut alloc::vec::Vec, new_bytes: &[u8]) { - if new_bytes.len() >= 32 { - unsafe { core::hint::unreachable_unchecked() } - } - buf.reserve(new_bytes.len()); - for i in 0..new_bytes.len() { - unsafe { - buf.as_mut_ptr().offset(buf.len() as isize).offset(i as isize).write_volatile(new_bytes[i]); - } - } - unsafe { - buf.set_len(buf.len() + new_bytes.len()); - } - } - - fn danger_anguished_string_write(out: &mut alloc::string::String, label: &str) { - let new_bytes = label.as_bytes(); - let buf = unsafe { out.as_mut_vec() }; - danger_anguished_bstring_write(buf, new_bytes); - } - fn danger_anguished_bstring_write(buf: &mut alloc::vec::Vec, new_bytes: &[u8]) { - if new_bytes.len() >= 16 { - unsafe { core::hint::unreachable_unchecked() } - } - - unsafe { - let dest = buf.as_mut_ptr().offset(buf.len() as isize); - let src = new_bytes.as_ptr(); - - let mut rem = new_bytes.len() as isize; - unsafe { - buf.set_len(buf.len() + new_bytes.len()); - } - /* - while rem % 4 > 0 { - dest.offset(rem - 1).write_unaligned(src.offset(rem - 1).read_unaligned()); - rem -= 1; - } - - while rem > 0 { - (dest.offset(rem - 4) as *mut u32).write_unaligned(unsafe { - *core::mem::transmute::<&u8, &u32>(&new_bytes[rem as usize - 4]) - }); - rem -= 4; - } - */ - /* - unsafe { - core::arch::asm!( - "7:", - "cmp {rem}, 4", - "jb 8f", - "sub {rem}, 4", - "mov {buf:e}, dword ptr [{src} + {rem}]", - "mov dword ptr [{dest} + {rem}], {buf:e}", - "jmp 7b", - "8:", - "test {rem}, {rem}", - "jz 10f", - "sub {rem}, 1", - "mov {buf:l}, byte ptr [{src} + {rem}]", - "mov byte ptr [{dest} + {rem}], {buf:l}", - "jnz 8b", - "10:", - src = in(reg) src, - dest = in(reg) dest, - rem = in(reg) rem, -// tmp = out(reg) _, - buf = out(reg) _, - options(nostack), - ); - } - */ - /* - unsafe { - core::arch::asm!( - "mov {tmp}, {rem}", - "and {tmp}, 3", - "je 3f", - "sub {rem}, {tmp}", - "2:", - "mov {buf:l}, byte ptr [{src}]", - "mov byte ptr [{dest}], {buf:l}", - "add {src}, 1", - "add {dest}, 1", - "sub {tmp}, 1", - "jnz 2b", - "3:", - "test {rem}, {rem}", - "jz 5f", - "4:", - "sub {rem}, 4", - "mov {buf:e}, dword ptr [{src} + {rem}]", - "mov dword ptr [{dest} + {rem}], {buf:e}", - "jnz 4b", - "5:", - src = in(reg) src, - dest = in(reg) dest, - rem = in(reg) rem, - tmp = out(reg) _, - buf = out(reg) _, - ); - } - */ - /* - */ - dest.offset(0 as isize).write(new_bytes[0]); - for i in 1..new_bytes.len() { - unsafe { - dest.offset(i as isize).write(new_bytes[i]); - } - } - // } - } - } - - fn danger_anguished_variable_length_string_write(out: &mut alloc::string::String, label: &str) { - let new_bytes = label.as_bytes(); - let buf = unsafe { out.as_mut_vec() }; - danger_anguished_variable_length_bstring_write(buf, new_bytes); - } - #[cfg_attr(feature="profiling", inline(never))] - fn danger_anguished_variable_length_bstring_write(buf: &mut alloc::vec::Vec, new_bytes: &[u8]) { - if new_bytes.len() >= 16 { - unsafe { core::hint::unreachable_unchecked() } - } - if new_bytes.len() == 0 { - unsafe { core::hint::unreachable_unchecked() } - } - - unsafe { - let dest = buf.as_mut_ptr().offset(buf.len() as isize); - let src = new_bytes.as_ptr(); - - let mut rem = new_bytes.len() as isize; - unsafe { - buf.set_len(buf.len() + new_bytes.len()); - } - /* - while rem % 4 > 0 { - dest.offset(rem - 1).write_unaligned(src.offset(rem - 1).read_unaligned()); - rem -= 1; - } - - while rem > 0 { - (dest.offset(rem - 4) as *mut u32).write_unaligned(unsafe { - *core::mem::transmute::<&u8, &u32>(&new_bytes[rem as usize - 4]) - }); - rem -= 4; - } - */ - unsafe { - /* - if rem >= 8 { - rem -= 8; - (dest.offset(rem) as *mut u64).write_unaligned((src.offset(rem) as *const u64).read_unaligned()) - } - if rem >= 4 { - rem -= 4; - (dest.offset(rem) as *mut u32).write_unaligned((src.offset(rem) as *const u32).read_unaligned()); - if rem == 0 { - return; - } - } - if rem >= 2 { - rem -= 2; - (dest.offset(rem) as *mut u16).write_unaligned((src.offset(rem) as *const u16).read_unaligned()); - if rem == 0 { - return; - } - } - if rem >= 1 { - rem -= 1; - (dest.offset(rem) as *mut u8).write_unaligned((src.offset(rem) as *const u8).read_unaligned()) - } - */ - core::arch::asm!( - "7:", - "cmp {rem:e}, 8", - "jb 8f", - "mov {buf:r}, qword ptr [{src} + {rem} - 8]", - "mov qword ptr [{dest} + {rem} - 8], {buf:r}", - "sub {rem:e}, 8", - "jz 11f", - "8:", - "cmp {rem:e}, 4", - "jb 9f", - "mov {buf:e}, dword ptr [{src} + {rem} - 4]", - "mov dword ptr [{dest} + {rem} - 4], {buf:e}", - "sub {rem:e}, 4", - "jz 11f", - "9:", - "cmp {rem:e}, 2", - "jb 10f", - "mov {buf:x}, word ptr [{src} + {rem} - 2]", - "mov word ptr [{dest} + {rem} - 2], {buf:x}", - "sub {rem:e}, 2", - "jz 11f", - "10:", - "cmp {rem:e}, 1", - "jb 11f", - "mov {buf:l}, byte ptr [{src} + {rem} - 1]", - "mov byte ptr [{dest} + {rem} - 1], {buf:l}", - "11:", - src = in(reg) src, - dest = in(reg) dest, - rem = inout(reg) rem => _, -// tmp = out(reg) _, - buf = out(reg) _, - options(nostack), - ); - } - /* - unsafe { - core::arch::asm!( - "7:", - "cmp {rem:e}, 4", - "jb 8f", - "sub {rem:e}, 4", - "mov {buf:e}, dword ptr [{src} + {rem}]", - "mov dword ptr [{dest} + {rem}], {buf:e}", - "jmp 7b", - "8:", - "test {rem:e}, {rem:e}", - "jz 10f", - "sub {rem:e}, 1", - "mov {buf:l}, byte ptr [{src} + {rem}]", - "mov byte ptr [{dest} + {rem}], {buf:l}", - "jnz 8b", - "10:", - src = in(reg) src, - dest = in(reg) dest, - rem = in(reg) rem, -// tmp = out(reg) _, - buf = out(reg) _, - options(nostack), - ); - } - */ - /* - unsafe { - core::arch::asm!( - "mov {tmp}, {rem}", - "and {tmp}, 3", - "je 3f", - "sub {rem}, {tmp}", - "2:", - "mov {buf:l}, byte ptr [{src}]", - "mov byte ptr [{dest}], {buf:l}", - "add {src}, 1", - "add {dest}, 1", - "sub {tmp}, 1", - "jnz 2b", - "3:", - "test {rem}, {rem}", - "jz 5f", - "4:", - "sub {rem}, 4", - "mov {buf:e}, dword ptr [{src} + {rem}]", - "mov dword ptr [{dest} + {rem}], {buf:e}", - "jnz 4b", - "5:", - src = in(reg) src, - dest = in(reg) dest, - rem = in(reg) rem, - tmp = out(reg) _, - buf = out(reg) _, - ); - } - */ - /* - for i in 0..new_bytes.len() { - unsafe { - buf.as_mut_ptr().offset(buf.len() as isize).offset(i as isize).write_volatile(new_bytes[i]); - } - } - */ - } - } - fn danger_anguished_smaller_variable_length_bstring_write(buf: &mut alloc::vec::Vec, new_bytes: &[u8]) { - if new_bytes.len() >= 8 { - unsafe { core::hint::unreachable_unchecked() } - } - if new_bytes.len() == 0 { - unsafe { core::hint::unreachable_unchecked() } - } - - unsafe { - let dest = buf.as_mut_ptr().offset(buf.len() as isize); - let src = new_bytes.as_ptr(); - - let mut rem = new_bytes.len() as isize; - unsafe { - buf.set_len(buf.len() + new_bytes.len()); - } - /* - while rem % 4 > 0 { - dest.offset(rem - 1).write_unaligned(src.offset(rem - 1).read_unaligned()); - rem -= 1; - } - - while rem > 0 { - (dest.offset(rem - 4) as *mut u32).write_unaligned(unsafe { - *core::mem::transmute::<&u8, &u32>(&new_bytes[rem as usize - 4]) - }); - rem -= 4; - } - */ - unsafe { - /* - if rem >= 4 { - rem -= 4; - (dest.offset(rem as isize) as *mut u32).write_unaligned((src.offset(rem as isize) as *const u32).read_unaligned()); - if rem == 0 { - return; - } - } - if rem >= 2 { - rem -= 2; - (dest.offset(rem as isize) as *mut u16).write_unaligned((src.offset(rem as isize) as *const u16).read_unaligned()); - if rem == 0 { - return; - } - } - if rem >= 1 { - rem -= 1; - (dest.offset(rem as isize) as *mut u8).write_unaligned((src.offset(rem as isize) as *const u8).read_unaligned()) - } - */ - core::arch::asm!( - "8:", - "cmp {rem:e}, 4", - "jb 9f", - "mov {buf:e}, dword ptr [{src} + {rem} - 4]", - "mov dword ptr [{dest} + {rem} - 4], {buf:e}", - "sub {rem:e}, 4", - "jz 11f", - "9:", - "cmp {rem:e}, 2", - "jb 10f", - "mov {buf:x}, word ptr [{src} + {rem} - 2]", - "mov word ptr [{dest} + {rem} - 2], {buf:x}", - "sub {rem:e}, 2", - "jz 11f", - "10:", - "cmp {rem:e}, 1", - "jb 11f", - "mov {buf:l}, byte ptr [{src} + {rem} - 1]", - "mov byte ptr [{dest} + {rem} - 1], {buf:l}", - "11:", - src = in(reg) src, - dest = in(reg) dest, - rem = inout(reg) rem => _, -// tmp = out(reg) _, - buf = out(reg) _, - options(nostack), - ); - } - /* - unsafe { - core::arch::asm!( - "7:", - "cmp {rem:e}, 4", - "jb 8f", - "sub {rem:e}, 4", - "mov {buf:e}, dword ptr [{src} + {rem}]", - "mov dword ptr [{dest} + {rem}], {buf:e}", - "jmp 7b", - "8:", - "test {rem:e}, {rem:e}", - "jz 10f", - "sub {rem:e}, 1", - "mov {buf:l}, byte ptr [{src} + {rem}]", - "mov byte ptr [{dest} + {rem}], {buf:l}", - "jnz 8b", - "10:", - src = in(reg) src, - dest = in(reg) dest, - rem = in(reg) rem, -// tmp = out(reg) _, - buf = out(reg) _, - options(nostack), - ); - } - */ - /* - unsafe { - core::arch::asm!( - "mov {tmp}, {rem}", - "and {tmp}, 3", - "je 3f", - "sub {rem}, {tmp}", - "2:", - "mov {buf:l}, byte ptr [{src}]", - "mov byte ptr [{dest}], {buf:l}", - "add {src}, 1", - "add {dest}, 1", - "sub {tmp}, 1", - "jnz 2b", - "3:", - "test {rem}, {rem}", - "jz 5f", - "4:", - "sub {rem}, 4", - "mov {buf:e}, dword ptr [{src} + {rem}]", - "mov dword ptr [{dest} + {rem}], {buf:e}", - "jnz 4b", - "5:", - src = in(reg) src, - dest = in(reg) dest, - rem = in(reg) rem, - tmp = out(reg) _, - buf = out(reg) _, - ); - } - */ - /* - for i in 0..new_bytes.len() { - unsafe { - buf.as_mut_ptr().offset(buf.len() as isize).offset(i as isize).write_volatile(new_bytes[i]); - } - } - */ - } - } - - let address: u64 = 0; - let context = Some(&NoContext); - let colors = &NoColors; - if self.prefixes.rep_any() { - if self.xacquire() { - danger_anguished_string_write(out, "xacquire "); - } - if self.xrelease() { - danger_anguished_string_write(out, "xrelease "); - } - - if self.opcode.can_rep() { - if self.prefixes.rep() { - danger_anguished_string_write(out, "rep "); - } else if self.prefixes.repnz() { - danger_anguished_string_write(out, "repnz "); - } - } - } - - if self.prefixes.lock() { - danger_anguished_string_write(out, "lock "); - } - - use core::mem::MaybeUninit; - - danger_anguished_variable_length_string_write(out, self.opcode.name()); - - if self.operand_count > 0 { - danger_anguished_string_write(out, " "); - - let rel_res = { - let out = unsafe { core::mem::transmute::<&mut alloc::string::String, &mut BigEnoughString>(out) }; - self.visit_operand(0, &mut RelativeBranchPrinter { - inst: &self, - out: &mut NoColorsSink { - out: out, - }, - })? - }; - if rel_res { - return Ok(()); - } - - fn display_op(inst: &Instruction, op_nr: u8, colors: &Y, out: &mut alloc::string::String) -> fmt::Result { - struct OperandPrinter<'a, Y: YaxColors> { - out: &'a mut alloc::string::String, - op_nr: u8, - colors: &'a Y, - inst: &'a Instruction, - } - - impl<'a, Y: YaxColors> crate::long_mode::OperandVisitor for OperandPrinter<'a, Y> { - type Ok = (); - type Error = fmt::Error; - - #[cfg_attr(feature="profiling", inline(never))] - fn visit_reg(&mut self, reg: RegSpec) -> Result { - let label = regspec_label(®); - danger_anguished_smaller_variable_length_bstring_write(unsafe { self.out.as_mut_vec() }, label.as_bytes()); -// danger_anguished_variable_length_string_write(self.out, label); - Ok(()) - } - #[cfg_attr(feature="profiling", inline(never))] - fn visit_deref(&mut self, reg: RegSpec) -> Result { - danger_anguished_smaller_variable_length_bstring_write(unsafe { self.out.as_mut_vec() }, mem_size_label(self.inst.mem_size).as_bytes()); -// self.out.write_str(" ")?; - - if self.op_nr >= 4 { - unsafe { core::hint::unreachable_unchecked(); } - } - if let Some(prefix) = self.inst.segment_override_for_op(self.op_nr) { - danger_anguished_string_write(self.out, " "); - danger_anguished_bstring_write(unsafe{self.out.as_mut_vec()}, prefix.name()); -// self.out.write_str(":")?; - danger_anguished_string_write(self.out, ":["); - } else { -// self.out.write_str("[")?; - danger_anguished_string_write(self.out, " ["); - } - let label = regspec_label(®); - danger_anguished_smaller_variable_length_bstring_write(unsafe { self.out.as_mut_vec() }, label.as_bytes()); -// self.out.write_str("]") - danger_anguished_string_write(self.out, "]"); - Ok(()) - } - #[cfg_attr(feature="profiling", inline(never))] - fn visit_disp(&mut self, reg: RegSpec, disp: i32) -> Result { - danger_anguished_smaller_variable_length_bstring_write(unsafe { self.out.as_mut_vec() }, mem_size_label(self.inst.mem_size).as_bytes()); - - if self.op_nr >= 4 { - unsafe { core::hint::unreachable_unchecked(); } - } - if let Some(prefix) = self.inst.segment_override_for_op(self.op_nr) { - danger_anguished_string_write(self.out, " "); - danger_anguished_bstring_write(unsafe{self.out.as_mut_vec()}, prefix.name()); - danger_anguished_string_write(self.out, ":["); - } else { - danger_anguished_string_write(self.out, " ["); - } - let label = regspec_label(®); - if label.len() < 2 { - unsafe { core::hint::unreachable_unchecked(); } - } - danger_anguished_smaller_variable_length_bstring_write(unsafe { self.out.as_mut_vec() }, label.as_bytes()); - // write!(self.out, "{}", self.colors.number(signed_i32_hex(disp)))?; - let mut v = disp as u32; - if disp < 0 { - danger_anguished_string_write(self.out, " - 0x"); - v = -disp as u32; - } else { - danger_anguished_string_write(self.out, " + 0x"); - } - if v == 0 { - danger_anguished_string_write(self.out, "0"); - } else { - let lzcnt = v.leading_zeros(); - let mut digits = 8 - (lzcnt/8); - while digits > 0 { - let digit = (v >> (digits * 8)) & 0xf; - let c = c_to_hex(digit as u8); - danger_anguished_bstring_write(unsafe {self.out.as_mut_vec()}, &[c]); - digits -= 1; - } - } - /* - let mut buf = [MaybeUninit::::uninit(); 8]; - let mut curr = buf.len(); - loop { - let digit = v % 16; - static CHARSET: &'static [u8; 16] = b"0123456789abcdef"; - let c = CHARSET[digit as usize]; - curr -= 1; - buf[curr].write(c); - v = v / 16; - if v == 0 { - break; - } - } - let buf = &buf[curr..]; - let s = unsafe { - core::mem::transmute::<&[MaybeUninit], &str>(buf) - }; - danger_anguished_string_write(&mut self.out, s); - */ - danger_anguished_string_write(&mut self.out, "]"); - Ok(()) - } - #[cfg_attr(feature="profiling", inline(never))] - fn visit_i8(&mut self, imm: i8) -> Result { - let mut v = imm as u8; - if imm < 0 { - danger_anguished_string_write(&mut self.out, "-"); - v = -imm as u8; - } - danger_anguished_string_write(&mut self.out, "0x"); - let mut buf = [MaybeUninit::::uninit(); 2]; - let mut curr = buf.len(); - loop { - let digit = v % 16; - let c = c_to_hex(digit as u8); - curr -= 1; - buf[curr].write(c); - v = v / 16; - if v == 0 { - break; - } - } - let buf = &buf[curr..]; - let s: &str = unsafe { - core::mem::transmute::<&[MaybeUninit], &str>(buf) - }; - danger_anguished_string_write(&mut self.out, s); - Ok(()) - } - #[cfg_attr(feature="profiling", inline(never))] - fn visit_u8(&mut self, imm: u8) -> Result { - let mut v = imm as u8; - danger_anguished_string_write(&mut self.out, "0x"); - let mut buf = [MaybeUninit::::uninit(); 2]; - let mut curr = buf.len(); - loop { - let digit = v % 16; - let c = c_to_hex(digit as u8); - curr -= 1; - buf[curr].write(c); - v = v / 16; - if v == 0 { - break; - } - } - let buf = &buf[curr..]; - let s: &str = unsafe { - core::mem::transmute::<&[MaybeUninit], &str>(buf) - }; - danger_anguished_string_write(&mut self.out, s); - Ok(()) - - } - #[cfg_attr(feature="profiling", inline(never))] - fn visit_i16(&mut self, imm: i16) -> Result { - let mut v = imm as u16; - if imm < 0 { - danger_anguished_string_write(&mut self.out, "-"); - v = -imm as u16; - } - danger_anguished_string_write(&mut self.out, "0x"); - let mut buf = [MaybeUninit::::uninit(); 4]; - let mut curr = buf.len(); - loop { - let digit = v % 16; - let c = c_to_hex(digit as u8); - curr -= 1; - buf[curr].write(c); - v = v / 16; - if v == 0 { - break; - } - } - let buf = &buf[curr..]; - let s: &str = unsafe { - core::mem::transmute::<&[MaybeUninit], &str>(buf) - }; - anguished_string_write(&mut self.out, s); - Ok(()) - } - #[cfg_attr(feature="profiling", inline(never))] - fn visit_u16(&mut self, imm: u16) -> Result { - let mut v = imm as u32; - danger_anguished_string_write(&mut self.out, "0x"); - let mut buf = [MaybeUninit::::uninit(); 4]; - let mut curr = buf.len(); - loop { - let digit = v % 16; - let c = c_to_hex(digit as u8); - curr -= 1; - buf[curr].write(c); - v = v / 16; - if v == 0 { - break; - } - } - let buf = &buf[curr..]; - let s = unsafe { - core::mem::transmute::<&[MaybeUninit], &str>(buf) - }; - anguished_string_write(&mut self.out, s); - Ok(()) - } - #[cfg_attr(feature="profiling", inline(never))] - fn visit_i32(&mut self, imm: i32) -> Result { - let mut v = imm as u32; - if imm < 0 { - danger_anguished_string_write(&mut self.out, "-"); - v = -imm as u32; - } - danger_anguished_string_write(&mut self.out, "0x"); - let mut buf = [MaybeUninit::::uninit(); 8]; - let mut curr = buf.len(); - loop { - let digit = v % 16; - let c = c_to_hex(digit as u8); - curr -= 1; - buf[curr].write(c); - v = v / 16; - if v == 0 { - break; - } - } - let buf = &buf[curr..]; - let s = unsafe { - core::mem::transmute::<&[MaybeUninit], &str>(buf) - }; - // danger_anguished_string_write(&mut self.out, s); - danger_anguished_smaller_variable_length_bstring_write(unsafe { self.out.as_mut_vec() }, s.as_bytes()); - Ok(()) - } - #[cfg_attr(feature="profiling", inline(never))] - fn visit_u32(&mut self, imm: u32) -> Result { - let mut v = imm as u32; - danger_anguished_string_write(&mut self.out, "0x"); - let mut buf = [MaybeUninit::::uninit(); 8]; - let mut curr = buf.len(); - loop { - let digit = v % 16; - let c = c_to_hex(digit as u8); - curr -= 1; - buf[curr].write(c); - v = v / 16; - if v == 0 { - break; - } - } - let buf = &buf[curr..]; - let s = unsafe { - core::mem::transmute::<&[MaybeUninit], &str>(buf) - }; - //danger_anguished_string_write(&mut self.out, s); - danger_anguished_smaller_variable_length_bstring_write(unsafe { self.out.as_mut_vec() }, s.as_bytes()); - Ok(()) - } - #[cfg_attr(feature="profiling", inline(never))] - fn visit_i64(&mut self, imm: i64) -> Result { - let mut v = imm as u32; - if imm < 0 { - danger_anguished_string_write(&mut self.out, "-"); - v = -imm as u32; - } - danger_anguished_string_write(&mut self.out, "0x"); - let mut buf = [MaybeUninit::::uninit(); 16]; - let mut curr = buf.len(); - loop { - let digit = v % 16; - let c = c_to_hex(digit as u8); - curr -= 1; - buf[curr].write(c); - v = v / 16; - if v == 0 { - break; - } - } - let buf = &buf[curr..]; - let s = unsafe { - core::mem::transmute::<&[MaybeUninit], &str>(buf) - }; - danger_anguished_variable_length_bstring_write(unsafe { self.out.as_mut_vec() }, s.as_bytes()); - Ok(()) - - - } - #[cfg_attr(feature="profiling", inline(never))] - fn visit_u64(&mut self, imm: u64) -> Result { - let mut v = imm as u64; - danger_anguished_string_write(&mut self.out, "0x"); - let mut buf = [MaybeUninit::::uninit(); 16]; - let mut curr = buf.len(); - loop { - let digit = v % 16; - let c = c_to_hex(digit as u8); - curr -= 1; - buf[curr].write(c); - v = v / 16; - if v == 0 { - break; - } - } - let buf = &buf[curr..]; - let s = unsafe { - core::mem::transmute::<&[MaybeUninit], &str>(buf) - }; - danger_anguished_variable_length_bstring_write(unsafe { self.out.as_mut_vec() }, s.as_bytes()); - Ok(()) - } - #[cfg_attr(feature="profiling", inline(never))] - fn visit_abs_u32(&mut self, imm: u32) -> Result { - danger_anguished_smaller_variable_length_bstring_write(unsafe { self.out.as_mut_vec() }, mem_size_label(self.inst.mem_size).as_bytes()); - danger_anguished_string_write(self.out, " [0x"); - let mut v = imm as u32; - let mut buf = [MaybeUninit::::uninit(); 16]; - let mut curr = buf.len(); - loop { - let digit = v % 16; - let c = c_to_hex(digit as u8); - curr -= 1; - buf[curr].write(c); - v = v / 16; - if v == 0 { - break; - } - } - let buf = &buf[curr..]; - let s = unsafe { - core::mem::transmute::<&[MaybeUninit], &str>(buf) - }; - // anguished_string_write(&mut self.out, s); - danger_anguished_smaller_variable_length_bstring_write(unsafe { self.out.as_mut_vec() }, s.as_bytes()); - danger_anguished_string_write(self.out, "]"); - Ok(()) - } - #[cfg_attr(feature="profiling", inline(never))] - fn visit_abs_u64(&mut self, imm: u64) -> Result { - danger_anguished_smaller_variable_length_bstring_write(unsafe { self.out.as_mut_vec() }, mem_size_label(self.inst.mem_size).as_bytes()); - danger_anguished_string_write(self.out, " [0x"); - let mut v = imm as u64; - let mut buf = [MaybeUninit::::uninit(); 16]; - let mut curr = buf.len(); - loop { - let digit = v % 16; - let c = c_to_hex(digit as u8); - curr -= 1; - buf[curr].write(c); - v = v / 16; - if v == 0 { - break; - } - } - let buf = &buf[curr..]; - let s = unsafe { - core::mem::transmute::<&[MaybeUninit], &str>(buf) - }; - // anguished_string_write(&mut self.out, s); - danger_anguished_smaller_variable_length_bstring_write(unsafe { self.out.as_mut_vec() }, s.as_bytes()); - danger_anguished_string_write(self.out, "]"); - Ok(()) - } - #[cfg_attr(feature="profiling", inline(never))] - fn visit_reg_scale(&mut self, reg: RegSpec, scale: u8) -> Result { - danger_anguished_smaller_variable_length_bstring_write(unsafe { self.out.as_mut_vec() }, mem_size_label(self.inst.mem_size).as_bytes()); - danger_anguished_string_write(self.out, " "); - - if self.op_nr >= 4 { - unsafe { core::hint::unreachable_unchecked(); } - } - if let Some(prefix) = self.inst.segment_override_for_op(self.op_nr) { - danger_anguished_bstring_write(unsafe{self.out.as_mut_vec()}, prefix.name()); - danger_anguished_string_write(self.out, ":"); - } - danger_anguished_string_write(self.out, "["); - let label = regspec_label(®); - danger_anguished_smaller_variable_length_bstring_write(unsafe { self.out.as_mut_vec() }, label.as_bytes()); - danger_anguished_string_write(self.out, " * "); - danger_anguished_bstring_write(unsafe { self.out.as_mut_vec() }, &[scale + b'0']); - danger_anguished_string_write(self.out, "]"); - Ok(()) - } - #[cfg_attr(feature="profiling", inline(never))] - fn visit_index_base_scale(&mut self, base: RegSpec, index: RegSpec, scale: u8) -> Result { - danger_anguished_smaller_variable_length_bstring_write(unsafe { self.out.as_mut_vec() }, mem_size_label(self.inst.mem_size).as_bytes()); - danger_anguished_string_write(self.out, " "); - - if self.op_nr >= 4 { - unsafe { core::hint::unreachable_unchecked(); } - } - if let Some(prefix) = self.inst.segment_override_for_op(self.op_nr) { - danger_anguished_bstring_write(unsafe{self.out.as_mut_vec()}, prefix.name()); - danger_anguished_string_write(self.out, ":"); - } - danger_anguished_string_write(self.out, "["); - let label = regspec_label(&base); - danger_anguished_smaller_variable_length_bstring_write(unsafe { self.out.as_mut_vec() }, label.as_bytes()); - danger_anguished_string_write(self.out, " + "); - let label = regspec_label(&index); - danger_anguished_smaller_variable_length_bstring_write(unsafe { self.out.as_mut_vec() }, label.as_bytes()); - danger_anguished_string_write(self.out, " * "); - danger_anguished_bstring_write(unsafe { self.out.as_mut_vec() }, &[scale + b'0']); - danger_anguished_string_write(self.out, "]"); - Ok(()) - } - #[cfg_attr(feature="profiling", inline(never))] - fn visit_reg_scale_disp(&mut self, reg: RegSpec, scale: u8, disp: i32) -> Result { - danger_anguished_smaller_variable_length_bstring_write(unsafe { self.out.as_mut_vec() }, mem_size_label(self.inst.mem_size).as_bytes()); - danger_anguished_string_write(self.out, " "); - - if self.op_nr >= 4 { - unsafe { core::hint::unreachable_unchecked(); } - } - if let Some(prefix) = self.inst.segment_override_for_op(self.op_nr) { - danger_anguished_bstring_write(unsafe{self.out.as_mut_vec()}, prefix.name()); - danger_anguished_string_write(self.out, ":"); - } - danger_anguished_string_write(self.out, "["); - let label = regspec_label(®); - danger_anguished_smaller_variable_length_bstring_write(unsafe { self.out.as_mut_vec() }, label.as_bytes()); - danger_anguished_string_write(self.out, " * "); - danger_anguished_bstring_write(unsafe { self.out.as_mut_vec() }, &[scale + b'0']); - let mut v = disp as u32; - if disp < 0 { - danger_anguished_string_write(self.out, " - "); - v = -disp as u32; - } else { - danger_anguished_string_write(self.out, " + "); - } - danger_anguished_string_write(self.out, "0x"); - let mut buf = [MaybeUninit::::uninit(); 8]; - let mut curr = buf.len(); - loop { - let digit = v % 16; - let c = c_to_hex(digit as u8); - curr -= 1; - buf[curr].write(c); - v = v / 16; - if v == 0 { - break; - } - } - let buf = &buf[curr..]; - let s = unsafe { - core::mem::transmute::<&[MaybeUninit], &str>(buf) - }; - // anguished_string_write(&mut self.out, s); - danger_anguished_smaller_variable_length_bstring_write(unsafe { self.out.as_mut_vec() }, s.as_bytes()); - danger_anguished_string_write(self.out, "]"); - Ok(()) - } - #[cfg_attr(feature="profiling", inline(never))] - fn visit_index_base_scale_disp(&mut self, base: RegSpec, index: RegSpec, scale: u8, disp: i32) -> Result { - danger_anguished_smaller_variable_length_bstring_write(unsafe { self.out.as_mut_vec() }, mem_size_label(self.inst.mem_size).as_bytes()); - danger_anguished_string_write(self.out, " "); - - if self.op_nr >= 4 { - unsafe { core::hint::unreachable_unchecked(); } - } - if let Some(prefix) = self.inst.segment_override_for_op(self.op_nr) { - danger_anguished_bstring_write(unsafe{self.out.as_mut_vec()}, prefix.name()); - danger_anguished_string_write(self.out, ":"); - } - danger_anguished_string_write(self.out, "["); - let label = regspec_label(&base); - danger_anguished_smaller_variable_length_bstring_write(unsafe { self.out.as_mut_vec() }, label.as_bytes()); - danger_anguished_string_write(self.out, " + "); - let label = regspec_label(&index); - danger_anguished_smaller_variable_length_bstring_write(unsafe { self.out.as_mut_vec() }, label.as_bytes()); - danger_anguished_string_write(self.out, " * "); - danger_anguished_bstring_write(unsafe { self.out.as_mut_vec() }, &[scale + b'0']); - let mut v = disp as u32; - if disp < 0 { - danger_anguished_string_write(self.out, " - "); - v = -disp as u32; - } else { - danger_anguished_string_write(self.out, " + "); - } - danger_anguished_string_write(self.out, "0x"); - let mut buf = [MaybeUninit::::uninit(); 8]; - let mut curr = buf.len(); - loop { - let digit = v % 16; - let c = c_to_hex(digit as u8); - curr -= 1; - buf[curr].write(c); - v = v / 16; - if v == 0 { - break; - } - } - let buf = &buf[curr..]; - let s = unsafe { - core::mem::transmute::<&[MaybeUninit], &str>(buf) - }; - danger_anguished_smaller_variable_length_bstring_write(unsafe { self.out.as_mut_vec() }, s.as_bytes()); - danger_anguished_string_write(self.out, "]"); - Ok(()) - } - fn visit_other(&mut self) -> Result { - Ok(()) - } - fn visit_reg_mask_merge(&mut self, spec: RegSpec, mask: RegSpec, merge_mode: MergeMode) -> Result { - Ok(()) - } - fn visit_reg_mask_merge_sae(&mut self, spec: RegSpec, mask: RegSpec, merge_mode: MergeMode, sae_mode: crate::long_mode::SaeMode) -> Result { - Ok(()) - } - fn visit_reg_mask_merge_sae_noround(&mut self, spec: RegSpec, mask: RegSpec, merge_mode: MergeMode) -> Result { - Ok(()) - } - fn visit_reg_disp_masked(&mut self, spec: RegSpec, disp: i32, mask_reg: RegSpec) -> Result { - Ok(()) - } - fn visit_reg_deref_masked(&mut self, spec: RegSpec, mask_reg: RegSpec) -> Result { - Ok(()) - } - fn visit_reg_scale_masked(&mut self, spec: RegSpec, scale: u8, mask_reg: RegSpec) -> Result { - Ok(()) - } - fn visit_reg_scale_disp_masked(&mut self, spec: RegSpec, scale: u8, disp: i32, mask_reg: RegSpec) -> Result { - Ok(()) - } - fn visit_index_base_masked(&mut self, base: RegSpec, index: RegSpec, mask_reg: RegSpec) -> Result { - Ok(()) - } - fn visit_index_base_disp_masked(&mut self, base: RegSpec, index: RegSpec, disp: i32, mask_reg: RegSpec) -> Result { - Ok(()) - } - fn visit_index_base_scale_masked(&mut self, base: RegSpec, index: RegSpec, scale: u8, mask_reg: RegSpec) -> Result { - Ok(()) - } - fn visit_index_base_scale_disp_masked(&mut self, base: RegSpec, index: RegSpec, scale: u8, disp: i32, mask_reg: RegSpec) -> Result { - Ok(()) - } - } - - let mut printer = OperandPrinter { - out, - inst, - op_nr, - colors, - }; - inst.visit_operand(op_nr, &mut printer) - } - - display_op(self, 0, colors, out)?; - - for i in 1..self.operand_count { - match self.opcode { - _ => { - match &self.operands[i as usize] { - &OperandSpec::Nothing => { - // should never see a Nothing if we iterate only through - // `operand_count`.. - unsafe { crate::long_mode::unreachable_unchecked() } - }, - _ => { - danger_anguished_string_write(out, ", "); - display_op(self, i, colors, out)?; - if let Some(evex) = self.prefixes.evex() { - if evex.broadcast() && false { // x.is_memory() { - let scale = if self.opcode == Opcode::VCVTPD2PS || self.opcode == Opcode::VCVTTPD2UDQ || self.opcode == Opcode::VCVTPD2UDQ || self.opcode == Opcode::VCVTUDQ2PD || self.opcode == Opcode::VCVTPS2PD || self.opcode == Opcode::VCVTQQ2PS || self.opcode == Opcode::VCVTDQ2PD || self.opcode == Opcode::VCVTTPD2DQ || self.opcode == Opcode::VFPCLASSPS || self.opcode == Opcode::VFPCLASSPD || self.opcode == Opcode::VCVTNEPS2BF16 || self.opcode == Opcode::VCVTUQQ2PS || self.opcode == Opcode::VCVTPD2DQ || self.opcode == Opcode::VCVTTPS2UQQ || self.opcode == Opcode::VCVTPS2UQQ || self.opcode == Opcode::VCVTTPS2QQ || self.opcode == Opcode::VCVTPS2QQ { - if self.opcode == Opcode::VFPCLASSPS || self.opcode == Opcode::VCVTNEPS2BF16 { - if evex.vex().l() { - 8 - } else if evex.lp() { - 16 - } else { - 4 - } - } else if self.opcode == Opcode::VFPCLASSPD { - if evex.vex().l() { - 4 - } else if evex.lp() { - 8 - } else { - 2 - } - } else { - // vcvtpd2ps is "cool": in broadcast mode, it can read a - // double-precision float (qword), resize to single-precision, - // then broadcast that to the whole destination register. this - // means we need to show `xmm, qword [addr]{1to4}` if vector - // size is 256. likewise, scale of 8 for the same truncation - // reason if vector size is 512. - // vcvtudq2pd is the same story. - // vfpclassp{s,d} is a mystery to me. - if evex.vex().l() { - 4 - } else if evex.lp() { - 8 - } else { - 2 - } - } - } else { - // this should never be `None` - that would imply two - // memory operands for a broadcasted operation. - if let Some(width) = Operand::from_spec(self, self.operands[i as usize - 1]).width() { - width / self.mem_size - } else { - 0 - } - }; - write!(out, "{{1to{}}}", scale)?; - } - } - } - } - } - } - } - } - Ok(()) - } - #[cfg_attr(feature="profiling", inline(never))] pub fn write_to(&self, out: &mut T) -> fmt::Result { contextualize_intel(self, out) From 1b65f5bc6320cfbbcf4f0b264c71270304973a84 Mon Sep 17 00:00:00 2001 From: iximeow Date: Wed, 19 Jun 2024 11:19:39 -0700 Subject: [PATCH 48/95] clean up warnings, scope unsafe blocks better --- src/long_mode/display.rs | 254 +++++++++++++++++++++------------------ 1 file changed, 135 insertions(+), 119 deletions(-) diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index 84b0894..8138ab1 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -517,15 +517,16 @@ impl DisplaySink for alloc::string::String { unsafe { let dest = buf.as_mut_ptr().offset(buf.len() as isize); - let src = new_bytes.as_ptr(); - - let mut rem = new_bytes.len() as isize; + // this used to be enough to bamboozle llvm away from + // https://github.com/rust-lang/rust/issues/92993#issuecomment-2028915232https://github.com/rust-lang/rust/issues/92993#issuecomment-2028915232 + // if `s` is not fixed size. somewhere between Rust 1.68 and Rust 1.74 this stopped + // being sufficient, so `write_fixed_size` truly should only be used for fixed size `s` + // (otherwise this is a libc memcpy call in disguise). for fixed-size strings this + // unrolls into some kind of appropriate series of `mov`. dest.offset(0 as isize).write(new_bytes[0]); for i in 1..new_bytes.len() { - unsafe { - dest.offset(i as isize).write(new_bytes[i]); - } + dest.offset(i as isize).write(new_bytes[i]); } buf.set_len(buf.len() + new_bytes.len()); @@ -553,7 +554,7 @@ impl DisplaySink for alloc::string::String { let dest = buf.as_mut_ptr().offset(buf.len() as isize); let src = new_bytes.as_ptr(); - let mut rem = new_bytes.len() as isize; + let rem = new_bytes.len() as isize; // set_len early because there is no way to avoid the following asm!() writing that // same number of bytes into buf @@ -633,7 +634,7 @@ impl DisplaySink for alloc::string::String { let dest = buf.as_mut_ptr().offset(buf.len() as isize); let src = new_bytes.as_ptr(); - let mut rem = new_bytes.len() as isize; + let rem = new_bytes.len() as isize; // set_len early because there is no way to avoid the following asm!() writing that // same number of bytes into buf @@ -704,7 +705,7 @@ impl DisplaySink for alloc::string::String { let dest = buf.as_mut_ptr().offset(buf.len() as isize); let src = new_bytes.as_ptr(); - let mut rem = new_bytes.len() as isize; + let rem = new_bytes.len() as isize; // set_len early because there is no way to avoid the following asm!() writing that // same number of bytes into buf @@ -760,21 +761,23 @@ impl DisplaySink for alloc::string::String { let printed_size = (((8 - v.leading_zeros()) >> 2) + 1) as usize; self.reserve(printed_size); - unsafe { - let buf = unsafe { self.as_mut_vec() }; - let p = buf.as_mut_ptr(); - let mut curr = printed_size; - loop { - let digit = v % 16; - let c = c_to_hex(digit as u8); - curr -= 1; + let buf = unsafe { self.as_mut_vec() }; + let p = buf.as_mut_ptr(); + let mut curr = printed_size; + loop { + let digit = v % 16; + let c = c_to_hex(digit as u8); + curr -= 1; + unsafe { p.offset(curr as isize).write(c); - v = v / 16; - if v == 0 { - break; - } } + v = v / 16; + if v == 0 { + break; + } + } + unsafe { buf.set_len(buf.len() + printed_size); } @@ -791,21 +794,23 @@ impl DisplaySink for alloc::string::String { let printed_size = (((16 - v.leading_zeros()) >> 2) + 1) as usize; self.reserve(printed_size); - unsafe { - let buf = unsafe { self.as_mut_vec() }; - let p = buf.as_mut_ptr(); - let mut curr = printed_size; - loop { - let digit = v % 16; - let c = c_to_hex(digit as u8); - curr -= 1; + let buf = unsafe { self.as_mut_vec() }; + let p = buf.as_mut_ptr(); + let mut curr = printed_size; + loop { + let digit = v % 16; + let c = c_to_hex(digit as u8); + curr -= 1; + unsafe { p.offset(curr as isize).write(c); - v = v / 16; - if v == 0 { - break; - } } + v = v / 16; + if v == 0 { + break; + } + } + unsafe { buf.set_len(buf.len() + printed_size); } @@ -824,21 +829,23 @@ impl DisplaySink for alloc::string::String { let printed_size = (((32 - v.leading_zeros()) >> 2) + 1) as usize; self.reserve(printed_size); - unsafe { - let buf = unsafe { self.as_mut_vec() }; - let p = buf.as_mut_ptr(); - let mut curr = printed_size; - loop { - let digit = v % 16; - let c = c_to_hex(digit as u8); - curr -= 1; + let buf = unsafe { self.as_mut_vec() }; + let p = buf.as_mut_ptr(); + let mut curr = printed_size; + loop { + let digit = v % 16; + let c = c_to_hex(digit as u8); + curr -= 1; + unsafe { p.offset(curr as isize).write(c); - v = v / 16; - if v == 0 { - break; - } } + v = v / 16; + if v == 0 { + break; + } + } + unsafe { buf.set_len(buf.len() + printed_size); } @@ -856,21 +863,23 @@ impl DisplaySink for alloc::string::String { let printed_size = (((64 - v.leading_zeros()) >> 2) + 1) as usize; self.reserve(printed_size); - unsafe { - let buf = unsafe { self.as_mut_vec() }; - let p = buf.as_mut_ptr(); - let mut curr = printed_size; - loop { - let digit = v % 16; - let c = c_to_hex(digit as u8); - curr -= 1; + let buf = unsafe { self.as_mut_vec() }; + let p = buf.as_mut_ptr(); + let mut curr = printed_size; + loop { + let digit = v % 16; + let c = c_to_hex(digit as u8); + curr -= 1; + unsafe { p.offset(curr as isize).write(c); - v = v / 16; - if v == 0 { - break; - } } + v = v / 16; + if v == 0 { + break; + } + } + unsafe { buf.set_len(buf.len() + printed_size); } @@ -896,15 +905,16 @@ impl DisplaySink for BigEnoughString { unsafe { let dest = buf.as_mut_ptr().offset(buf.len() as isize); - let src = new_bytes.as_ptr(); - - let mut rem = new_bytes.len() as isize; + // this used to be enough to bamboozle llvm away from + // https://github.com/rust-lang/rust/issues/92993#issuecomment-2028915232https://github.com/rust-lang/rust/issues/92993#issuecomment-2028915232 + // if `s` is not fixed size. somewhere between Rust 1.68 and Rust 1.74 this stopped + // being sufficient, so `write_fixed_size` truly should only be used for fixed size `s` + // (otherwise this is a libc memcpy call in disguise). for fixed-size strings this + // unrolls into some kind of appropriate series of `mov`. dest.offset(0 as isize).write(new_bytes[0]); for i in 1..new_bytes.len() { - unsafe { - dest.offset(i as isize).write(new_bytes[i]); - } + dest.offset(i as isize).write(new_bytes[i]); } buf.set_len(buf.len() + new_bytes.len()); @@ -930,7 +940,7 @@ impl DisplaySink for BigEnoughString { let dest = buf.as_mut_ptr().offset(buf.len() as isize); let src = new_bytes.as_ptr(); - let mut rem = new_bytes.len() as isize; + let rem = new_bytes.len() as isize; // set_len early because there is no way to avoid the following asm!() writing that // same number of bytes into buf @@ -1008,7 +1018,7 @@ impl DisplaySink for BigEnoughString { let dest = buf.as_mut_ptr().offset(buf.len() as isize); let src = new_bytes.as_ptr(); - let mut rem = new_bytes.len() as isize; + let rem = new_bytes.len() as isize; // set_len early because there is no way to avoid the following asm!() writing that // same number of bytes into buf @@ -1077,7 +1087,7 @@ impl DisplaySink for BigEnoughString { let dest = buf.as_mut_ptr().offset(buf.len() as isize); let src = new_bytes.as_ptr(); - let mut rem = new_bytes.len() as isize; + let rem = new_bytes.len() as isize; // set_len early because there is no way to avoid the following asm!() writing that // same number of bytes into buf @@ -1132,21 +1142,23 @@ impl DisplaySink for BigEnoughString { // means we can write directly into the correct offsets of the output string. let printed_size = (((8 - v.leading_zeros()) >> 2) + 1) as usize; - unsafe { - let buf = unsafe { self.content.as_mut_vec() }; - let p = buf.as_mut_ptr(); - let mut curr = printed_size; - loop { - let digit = v % 16; - let c = c_to_hex(digit as u8); - curr -= 1; + let buf = unsafe { self.content.as_mut_vec() }; + let p = buf.as_mut_ptr(); + let mut curr = printed_size; + loop { + let digit = v % 16; + let c = c_to_hex(digit as u8); + curr -= 1; + unsafe { p.offset(curr as isize).write(c); - v = v / 16; - if v == 0 { - break; - } } + v = v / 16; + if v == 0 { + break; + } + } + unsafe { buf.set_len(buf.len() + printed_size); } @@ -1162,21 +1174,23 @@ impl DisplaySink for BigEnoughString { // means we can write directly into the correct offsets of the output string. let printed_size = (((16 - v.leading_zeros()) >> 2) + 1) as usize; - unsafe { - let buf = unsafe { self.content.as_mut_vec() }; - let p = buf.as_mut_ptr(); - let mut curr = printed_size; - loop { - let digit = v % 16; - let c = c_to_hex(digit as u8); - curr -= 1; + let buf = unsafe { self.content.as_mut_vec() }; + let p = buf.as_mut_ptr(); + let mut curr = printed_size; + loop { + let digit = v % 16; + let c = c_to_hex(digit as u8); + curr -= 1; + unsafe { p.offset(curr as isize).write(c); - v = v / 16; - if v == 0 { - break; - } } + v = v / 16; + if v == 0 { + break; + } + } + unsafe { buf.set_len(buf.len() + printed_size); } @@ -1193,21 +1207,23 @@ impl DisplaySink for BigEnoughString { // means we can write directly into the correct offsets of the output string. let printed_size = (((32 - v.leading_zeros()) >> 2) + 1) as usize; - unsafe { - let buf = unsafe { self.content.as_mut_vec() }; - let p = buf.as_mut_ptr(); - let mut curr = printed_size; - loop { - let digit = v % 16; - let c = c_to_hex(digit as u8); - curr -= 1; + let buf = unsafe { self.content.as_mut_vec() }; + let p = buf.as_mut_ptr(); + let mut curr = printed_size; + loop { + let digit = v % 16; + let c = c_to_hex(digit as u8); + curr -= 1; + unsafe { p.offset(curr as isize).write(c); - v = v / 16; - if v == 0 { - break; - } } + v = v / 16; + if v == 0 { + break; + } + } + unsafe { buf.set_len(buf.len() + printed_size); } @@ -1224,24 +1240,24 @@ impl DisplaySink for BigEnoughString { // means we can write directly into the correct offsets of the output string. let printed_size = (((64 - v.leading_zeros()) >> 2) + 1) as usize; - unsafe { - let buf = unsafe { self.content.as_mut_vec() }; - let p = buf.as_mut_ptr(); - let mut curr = printed_size; - loop { - let digit = v % 16; - let c = c_to_hex(digit as u8); - curr -= 1; + let buf = unsafe { self.content.as_mut_vec() }; + let p = buf.as_mut_ptr(); + let mut curr = printed_size; + loop { + let digit = v % 16; + let c = c_to_hex(digit as u8); + curr -= 1; + unsafe { p.offset(curr as isize).write(c); - v = v / 16; - if v == 0 { - break; - } } - - buf.set_len(buf.len() + printed_size); + v = v / 16; + if v == 0 { + break; + } } + unsafe { buf.set_len(buf.len() + printed_size); } + Ok(()) } fn span_enter(&mut self, ty: TokenType) {} @@ -1547,7 +1563,7 @@ impl crate::long_mode::OperandVisitor for ColorizingOperandVisi } else { self.f.write_fixed_size("+ 0x")?; } - self.f.write_u32(v); + self.f.write_u32(v)?; } self.f.write_char(']') } @@ -5200,7 +5216,7 @@ impl <'instr, T: fmt::Write, Y: YaxColors> ShowContextual let mut out = NoColorsSink { out, }; - let mut out = &mut out; + let out = &mut out; contextualize_intel(instr, out) } @@ -5217,7 +5233,7 @@ impl ShowContextual Date: Wed, 19 Jun 2024 11:25:51 -0700 Subject: [PATCH 49/95] visit_disp is only outlined for bad reasons --- src/long_mode/display.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index 8138ab1..686285e 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -1469,6 +1469,7 @@ impl crate::long_mode::OperandVisitor for ColorizingOperandVisi self.f.write_fixed_size("]")?; Ok(()) } + #[inline(always)] fn visit_disp(&mut self, reg: RegSpec, disp: i32) -> Result { unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS.get_kinda_unchecked(self.instr.mem_size as usize))? }; if self.op_nr >= 4 { From 8b1677e14e59c8ed5c61e200863a19576034d62e Mon Sep 17 00:00:00 2001 From: iximeow Date: Wed, 19 Jun 2024 11:53:25 -0700 Subject: [PATCH 50/95] dedup mem size prefix printing --- src/long_mode/display.rs | 173 +++++++++++---------------------------- 1 file changed, 46 insertions(+), 127 deletions(-) diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index 686285e..75a85bb 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -1293,7 +1293,6 @@ impl BigEnoughString { struct ColorizingOperandVisitor<'a, T> { instr: &'a Instruction, - op_nr: u8, f: &'a mut T, } @@ -1434,17 +1433,6 @@ impl crate::long_mode::OperandVisitor for ColorizingOperandVisi Ok(()) } fn visit_abs_u32(&mut self, imm: u32) -> Result { - unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS.get_kinda_unchecked(self.instr.mem_size as usize))? }; - self.f.write_fixed_size(" ")?; - if self.op_nr >= 4 { - unsafe { core::hint::unreachable_unchecked(); } - } - if let Some(prefix) = self.instr.segment_override_for_op(self.op_nr) { - let name = prefix.name(); - self.f.write_char(name[0] as char)?; - self.f.write_char(name[1] as char)?; - self.f.write_char(':')?; - } self.f.write_fixed_size("[")?; self.f.write_fixed_size("0x")?; self.f.write_u32(imm)?; @@ -1452,38 +1440,16 @@ impl crate::long_mode::OperandVisitor for ColorizingOperandVisi Ok(()) } fn visit_abs_u64(&mut self, imm: u64) -> Result { - unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS.get_kinda_unchecked(self.instr.mem_size as usize))? }; - self.f.write_fixed_size(" ")?; - if self.op_nr >= 4 { - unsafe { core::hint::unreachable_unchecked(); } - } - if let Some(prefix) = self.instr.segment_override_for_op(self.op_nr) { - let name = prefix.name(); - self.f.write_char(name[0] as char)?; - self.f.write_char(name[1] as char)?; - self.f.write_char(':')?; - } self.f.write_fixed_size("[")?; self.f.write_fixed_size("0x")?; self.f.write_u64(imm)?; self.f.write_fixed_size("]")?; Ok(()) } - #[inline(always)] + #[cfg_attr(not(feature="profiling"), inline(always))] + #[cfg_attr(feature="profiling", inline(never))] fn visit_disp(&mut self, reg: RegSpec, disp: i32) -> Result { - unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS.get_kinda_unchecked(self.instr.mem_size as usize))? }; - if self.op_nr >= 4 { - unsafe { core::hint::unreachable_unchecked(); } - } - if let Some(prefix) = self.instr.segment_override_for_op(self.op_nr) { - let name = prefix.name(); - self.f.write_char(' ')?; - self.f.write_char(name[0] as char)?; - self.f.write_char(name[1] as char)?; - self.f.write_fixed_size(":[")?; - } else { - self.f.write_fixed_size(" [")?; - } + self.f.write_char('[')?; unsafe { self.f.write_lt_8(regspec_label(®))?; } self.f.write_fixed_size(" ")?; @@ -1500,36 +1466,12 @@ impl crate::long_mode::OperandVisitor for ColorizingOperandVisi self.f.write_fixed_size("]") } fn visit_deref(&mut self, reg: RegSpec) -> Result { - unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS.get_kinda_unchecked(self.instr.mem_size as usize))? }; - if self.op_nr >= 4 { - unsafe { core::hint::unreachable_unchecked(); } - } - if let Some(prefix) = self.instr.segment_override_for_op(self.op_nr) { - let name = prefix.name(); - self.f.write_char(' ')?; - self.f.write_char(name[0] as char)?; - self.f.write_char(name[1] as char)?; - self.f.write_fixed_size(":[")?; - } else { - self.f.write_fixed_size(" [")?; - } + self.f.write_fixed_size("[")?; unsafe { self.f.write_lt_8(regspec_label(®))?; } self.f.write_fixed_size("]") } fn visit_reg_scale(&mut self, reg: RegSpec, scale: u8) -> Result { - unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS.get_kinda_unchecked(self.instr.mem_size as usize))? }; - if self.op_nr >= 4 { - unsafe { core::hint::unreachable_unchecked(); } - } - if let Some(prefix) = self.instr.segment_override_for_op(self.op_nr) { - let name = prefix.name(); - self.f.write_char(' ')?; - self.f.write_char(name[0] as char)?; - self.f.write_char(name[1] as char)?; - self.f.write_fixed_size(":[")?; - } else { - self.f.write_fixed_size(" [")?; - } + self.f.write_fixed_size("[")?; unsafe { self.f.write_lt_8(regspec_label(®))?; } self.f.write_fixed_size(" * ")?; self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc @@ -1538,19 +1480,7 @@ impl crate::long_mode::OperandVisitor for ColorizingOperandVisi Ok(()) } fn visit_reg_scale_disp(&mut self, reg: RegSpec, scale: u8, disp: i32) -> Result { - unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS.get_kinda_unchecked(self.instr.mem_size as usize))? }; - if self.op_nr >= 4 { - unsafe { core::hint::unreachable_unchecked(); } - } - if let Some(prefix) = self.instr.segment_override_for_op(self.op_nr) { - let name = prefix.name(); - self.f.write_char(' ')?; - self.f.write_char(name[0] as char)?; - self.f.write_char(name[1] as char)?; - self.f.write_fixed_size(":[")?; - } else { - self.f.write_fixed_size(" [")?; - } + self.f.write_fixed_size("[")?; unsafe { self.f.write_lt_8(regspec_label(®))?; } self.f.write_fixed_size(" * ")?; self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc @@ -1569,19 +1499,7 @@ impl crate::long_mode::OperandVisitor for ColorizingOperandVisi self.f.write_char(']') } fn visit_index_base_scale(&mut self, base: RegSpec, index: RegSpec, scale: u8) -> Result { - unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS.get_kinda_unchecked(self.instr.mem_size as usize))? }; - if self.op_nr >= 4 { - unsafe { core::hint::unreachable_unchecked(); } - } - if let Some(prefix) = self.instr.segment_override_for_op(self.op_nr) { - let name = prefix.name(); - self.f.write_char(' ')?; - self.f.write_char(name[0] as char)?; - self.f.write_char(name[1] as char)?; - self.f.write_fixed_size(":[")?; - } else { - self.f.write_fixed_size(" [")?; - } + self.f.write_fixed_size("[")?; unsafe { self.f.write_lt_8(regspec_label(&base))?; } self.f.write_fixed_size(" + ")?; unsafe { self.f.write_lt_8(regspec_label(&index))?; } @@ -1590,19 +1508,7 @@ impl crate::long_mode::OperandVisitor for ColorizingOperandVisi self.f.write_fixed_size("]") } fn visit_index_base_scale_disp(&mut self, base: RegSpec, index: RegSpec, scale: u8, disp: i32) -> Result { - unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS.get_kinda_unchecked(self.instr.mem_size as usize))? }; - if self.op_nr >= 4 { - unsafe { core::hint::unreachable_unchecked(); } - } - if let Some(prefix) = self.instr.segment_override_for_op(self.op_nr) { - let name = prefix.name(); - self.f.write_char(' ')?; - self.f.write_char(name[0] as char)?; - self.f.write_char(name[1] as char)?; - self.f.write_fixed_size(":[")?; - } else { - self.f.write_fixed_size(" [")?; - } + self.f.write_fixed_size("[")?; unsafe { self.f.write_lt_8(regspec_label(&base))?; } self.f.write_fixed_size(" + ")?; unsafe { self.f.write_lt_8(regspec_label(&index))?; } @@ -1623,8 +1529,6 @@ impl crate::long_mode::OperandVisitor for ColorizingOperandVisi self.f.write_fixed_size("]") } fn visit_reg_disp_masked(&mut self, spec: RegSpec, disp: i32, mask_reg: RegSpec) -> Result { - unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS.get_kinda_unchecked(self.instr.mem_size as usize))? }; - self.f.write_fixed_size(" ")?; self.f.write_char('[')?; unsafe { self.f.write_lt_8(regspec_label(&spec))?; } self.f.write_char(' ')?; @@ -1640,16 +1544,12 @@ impl crate::long_mode::OperandVisitor for ColorizingOperandVisi write!(self.f, "{{{}}}", regspec_label(&mask_reg)) } fn visit_reg_deref_masked(&mut self, spec: RegSpec, mask_reg: RegSpec) -> Result { - unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS.get_kinda_unchecked(self.instr.mem_size as usize))? }; - self.f.write_fixed_size(" ")?; self.f.write_fixed_size("[")?; unsafe { self.f.write_lt_8(regspec_label(&spec))?; } self.f.write_fixed_size("]")?; write!(self.f, "{{{}}}", regspec_label(&mask_reg)) } fn visit_reg_scale_masked(&mut self, spec: RegSpec, scale: u8, mask_reg: RegSpec) -> Result { - unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS.get_kinda_unchecked(self.instr.mem_size as usize))? }; - self.f.write_fixed_size(" ")?; write!(self.f, "[{} * {}]", regspec_label(&spec), scale @@ -1657,8 +1557,7 @@ impl crate::long_mode::OperandVisitor for ColorizingOperandVisi write!(self.f, "{{{}}}", regspec_label(&mask_reg)) } fn visit_reg_scale_disp_masked(&mut self, spec: RegSpec, scale: u8, disp: i32, mask_reg: RegSpec) -> Result { - unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS.get_kinda_unchecked(self.instr.mem_size as usize))? }; - self.f.write_fixed_size(" [")?; + self.f.write_fixed_size("[")?; unsafe { self.f.write_lt_8(regspec_label(&spec))?; } self.f.write_fixed_size(" * ")?; self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc @@ -1675,8 +1574,6 @@ impl crate::long_mode::OperandVisitor for ColorizingOperandVisi write!(self.f, "{{{}}}", regspec_label(&mask_reg)) } fn visit_index_base_masked(&mut self, base: RegSpec, index: RegSpec, mask_reg: RegSpec) -> Result { - unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS.get_kinda_unchecked(self.instr.mem_size as usize))? }; - self.f.write_fixed_size(" ")?; self.f.write_fixed_size("[")?; unsafe { self.f.write_lt_8(regspec_label(&base))?; } self.f.write_fixed_size(" + ")?; @@ -1685,8 +1582,7 @@ impl crate::long_mode::OperandVisitor for ColorizingOperandVisi write!(self.f, "{{{}}}", regspec_label(&mask_reg)) } fn visit_index_base_disp_masked(&mut self, base: RegSpec, index: RegSpec, disp: i32, mask_reg: RegSpec) -> Result { - unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS.get_kinda_unchecked(self.instr.mem_size as usize))? }; - self.f.write_fixed_size(" [")?; + self.f.write_fixed_size("[")?; unsafe { self.f.write_lt_8(regspec_label(&base))?; } self.f.write_fixed_size(" + ")?; unsafe { self.f.write_lt_8(regspec_label(&index))?; } @@ -1703,8 +1599,7 @@ impl crate::long_mode::OperandVisitor for ColorizingOperandVisi write!(self.f, "{{{}}}", regspec_label(&mask_reg)) } fn visit_index_base_scale_masked(&mut self, base: RegSpec, index: RegSpec, scale: u8, mask_reg: RegSpec) -> Result { - unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS.get_kinda_unchecked(self.instr.mem_size as usize))? }; - self.f.write_fixed_size(" [")?; + self.f.write_fixed_size("[")?; unsafe { self.f.write_lt_8(regspec_label(&base))?; } self.f.write_fixed_size(" + ")?; unsafe { self.f.write_lt_8(regspec_label(&index))?; } @@ -1714,8 +1609,6 @@ impl crate::long_mode::OperandVisitor for ColorizingOperandVisi write!(self.f, "{{{}}}", regspec_label(&mask_reg)) } fn visit_index_base_scale_disp_masked(&mut self, base: RegSpec, index: RegSpec, scale: u8, disp: i32, mask_reg: RegSpec) -> Result { - unsafe { self.f.write_lt_8(MEM_SIZE_STRINGS.get_kinda_unchecked(self.instr.mem_size as usize))? }; - self.f.write_fixed_size(" ")?; write!(self.f, "[{} + {} * {} ", regspec_label(&base), regspec_label(&index), @@ -4789,9 +4682,21 @@ fn contextualize_intel(instr: &Instruction, out: &mut T) -> fmt: return Ok(()); } + if instr.operands[0 as usize].is_memory() { + unsafe { out.write_lt_8(MEM_SIZE_STRINGS.get_kinda_unchecked(instr.mem_size as usize))? }; + if let Some(prefix) = instr.segment_override_for_op(0) { + let name = prefix.name(); + out.write_char(' ')?; + out.write_char(name[0] as char)?; + out.write_char(name[1] as char)?; + out.write_fixed_size(":")?; + } else { + out.write_fixed_size(" ")?; + } + } + let mut displayer = ColorizingOperandVisitor { instr, - op_nr: 0, f: out, }; instr.visit_operand(0 as u8, &mut displayer)?; @@ -4800,18 +4705,34 @@ fn contextualize_intel(instr: &Instruction, out: &mut T) -> fmt: // don't worry about checking for `instr.operands[i] != Nothing`, it would be a bug to // reach that while iterating only to `operand_count`.. out.write_fixed_size(", ")?; + if i >= 4 { + unsafe { core::hint::unreachable_unchecked(); } + } + + if instr.operands[i as usize].is_memory() { + unsafe { out.write_lt_8(MEM_SIZE_STRINGS.get_kinda_unchecked(instr.mem_size as usize))? }; + if i >= 4 { + unsafe { core::hint::unreachable_unchecked(); } + } + if let Some(prefix) = instr.segment_override_for_op(i) { + let name = prefix.name(); + out.write_char(' ')?; + out.write_char(name[0] as char)?; + out.write_char(name[1] as char)?; + out.write_fixed_size(":")?; + } else { + out.write_fixed_size(" ")?; + } + } + let mut displayer = ColorizingOperandVisitor { instr, - op_nr: i, f: out, }; - if i >= 4 { - unsafe { core::hint::unreachable_unchecked(); } - } + instr.visit_operand(i as u8, &mut displayer)?; if let Some(evex) = instr.prefixes.evex() { - let x = Operand::from_spec(instr, instr.operands[i as usize]); - if evex.broadcast() && x.is_memory() { + if evex.broadcast() && instr.operands[i as usize].is_memory() { let scale = if instr.opcode == Opcode::VCVTPD2PS || instr.opcode == Opcode::VCVTTPD2UDQ || instr.opcode == Opcode::VCVTPD2UDQ || instr.opcode == Opcode::VCVTUDQ2PD || instr.opcode == Opcode::VCVTPS2PD || instr.opcode == Opcode::VCVTQQ2PS || instr.opcode == Opcode::VCVTDQ2PD || instr.opcode == Opcode::VCVTTPD2DQ || instr.opcode == Opcode::VFPCLASSPS || instr.opcode == Opcode::VFPCLASSPD || instr.opcode == Opcode::VCVTNEPS2BF16 || instr.opcode == Opcode::VCVTUQQ2PS || instr.opcode == Opcode::VCVTPD2DQ || instr.opcode == Opcode::VCVTTPS2UQQ || instr.opcode == Opcode::VCVTPS2UQQ || instr.opcode == Opcode::VCVTTPS2QQ || instr.opcode == Opcode::VCVTPS2QQ { if instr.opcode == Opcode::VFPCLASSPS || instr.opcode == Opcode::VCVTNEPS2BF16 { if evex.vex().l() { @@ -5271,7 +5192,6 @@ impl ShowContextual ShowContextual Date: Wed, 19 Jun 2024 12:26:40 -0700 Subject: [PATCH 51/95] no more fmt in display code, remove more dead struct fields --- src/long_mode/display.rs | 66 ++++++++++++++++++++++++++-------------- 1 file changed, 44 insertions(+), 22 deletions(-) diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index 75a85bb..535fc75 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -1292,7 +1292,6 @@ impl BigEnoughString { } struct ColorizingOperandVisitor<'a, T> { - instr: &'a Instruction, f: &'a mut T, } @@ -1541,20 +1540,30 @@ impl crate::long_mode::OperandVisitor for ColorizingOperandVisi } self.f.write_u32(v)?; self.f.write_char(']')?; - write!(self.f, "{{{}}}", regspec_label(&mask_reg)) + self.f.write_char('{')?; + unsafe { self.f.write_lt_8(regspec_label(&mask_reg))?; } + self.f.write_char('}')?; + Ok(()) } fn visit_reg_deref_masked(&mut self, spec: RegSpec, mask_reg: RegSpec) -> Result { self.f.write_fixed_size("[")?; unsafe { self.f.write_lt_8(regspec_label(&spec))?; } self.f.write_fixed_size("]")?; - write!(self.f, "{{{}}}", regspec_label(&mask_reg)) + self.f.write_char('{')?; + unsafe { self.f.write_lt_8(regspec_label(&mask_reg))?; } + self.f.write_char('}')?; + Ok(()) } fn visit_reg_scale_masked(&mut self, spec: RegSpec, scale: u8, mask_reg: RegSpec) -> Result { - write!(self.f, "[{} * {}]", - regspec_label(&spec), - scale - )?; - write!(self.f, "{{{}}}", regspec_label(&mask_reg)) + self.f.write_fixed_size("[")?; + unsafe { self.f.write_lt_8(regspec_label(&spec))?; } + self.f.write_fixed_size(" * ")?; + self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc + self.f.write_fixed_size("]")?; + self.f.write_char('{')?; + unsafe { self.f.write_lt_8(regspec_label(&mask_reg))?; } + self.f.write_char('}')?; + Ok(()) } fn visit_reg_scale_disp_masked(&mut self, spec: RegSpec, scale: u8, disp: i32, mask_reg: RegSpec) -> Result { self.f.write_fixed_size("[")?; @@ -1571,7 +1580,10 @@ impl crate::long_mode::OperandVisitor for ColorizingOperandVisi } self.f.write_u32(v)?; self.f.write_char(']')?; - write!(self.f, "{{{}}}", regspec_label(&mask_reg)) + self.f.write_char('{')?; + unsafe { self.f.write_lt_8(regspec_label(&mask_reg))?; } + self.f.write_char('}')?; + Ok(()) } fn visit_index_base_masked(&mut self, base: RegSpec, index: RegSpec, mask_reg: RegSpec) -> Result { self.f.write_fixed_size("[")?; @@ -1579,7 +1591,10 @@ impl crate::long_mode::OperandVisitor for ColorizingOperandVisi self.f.write_fixed_size(" + ")?; unsafe { self.f.write_lt_8(regspec_label(&index))?; } self.f.write_fixed_size("]")?; - write!(self.f, "{{{}}}", regspec_label(&mask_reg)) + self.f.write_char('{')?; + unsafe { self.f.write_lt_8(regspec_label(&mask_reg))?; } + self.f.write_char('}')?; + Ok(()) } fn visit_index_base_disp_masked(&mut self, base: RegSpec, index: RegSpec, disp: i32, mask_reg: RegSpec) -> Result { self.f.write_fixed_size("[")?; @@ -1596,7 +1611,10 @@ impl crate::long_mode::OperandVisitor for ColorizingOperandVisi } self.f.write_u32(v)?; self.f.write_char(']')?; - write!(self.f, "{{{}}}", regspec_label(&mask_reg)) + self.f.write_char('{')?; + unsafe { self.f.write_lt_8(regspec_label(&mask_reg))?; } + self.f.write_char('}')?; + Ok(()) } fn visit_index_base_scale_masked(&mut self, base: RegSpec, index: RegSpec, scale: u8, mask_reg: RegSpec) -> Result { self.f.write_fixed_size("[")?; @@ -1606,14 +1624,19 @@ impl crate::long_mode::OperandVisitor for ColorizingOperandVisi self.f.write_fixed_size(" * ")?; self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc self.f.write_fixed_size("]")?; - write!(self.f, "{{{}}}", regspec_label(&mask_reg)) + self.f.write_char('{')?; + unsafe { self.f.write_lt_8(regspec_label(&mask_reg))?; } + self.f.write_char('}')?; + Ok(()) } fn visit_index_base_scale_disp_masked(&mut self, base: RegSpec, index: RegSpec, scale: u8, disp: i32, mask_reg: RegSpec) -> Result { - write!(self.f, "[{} + {} * {} ", - regspec_label(&base), - regspec_label(&index), - scale, - )?; + self.f.write_fixed_size("[")?; + unsafe { self.f.write_lt_8(regspec_label(&base))?; } + self.f.write_fixed_size(" + ")?; + unsafe { self.f.write_lt_8(regspec_label(&index))?; } + self.f.write_fixed_size(" * ")?; + self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc + self.f.write_char(' ')?; let mut v = disp as u32; if disp < 0 { self.f.write_fixed_size("- 0x")?; @@ -1623,7 +1646,10 @@ impl crate::long_mode::OperandVisitor for ColorizingOperandVisi } self.f.write_u32(v)?; self.f.write_char(']')?; - write!(self.f, "{{{}}}", regspec_label(&mask_reg)) + self.f.write_char('{')?; + unsafe { self.f.write_lt_8(regspec_label(&mask_reg))?; } + self.f.write_char('}')?; + Ok(()) } fn visit_other(&mut self) -> Result { @@ -4696,7 +4722,6 @@ fn contextualize_intel(instr: &Instruction, out: &mut T) -> fmt: } let mut displayer = ColorizingOperandVisitor { - instr, f: out, }; instr.visit_operand(0 as u8, &mut displayer)?; @@ -4726,7 +4751,6 @@ fn contextualize_intel(instr: &Instruction, out: &mut T) -> fmt: } let mut displayer = ColorizingOperandVisitor { - instr, f: out, }; @@ -5191,7 +5215,6 @@ impl ShowContextual ShowContextual { write!(out, ", ")?; let mut displayer = ColorizingOperandVisitor { - instr: self, f: out, }; self.visit_operand(i as u8, &mut displayer)?; From 8221d7cdbdd59afb200f5b86735d8696dbeb8fc8 Mon Sep 17 00:00:00 2001 From: iximeow Date: Wed, 19 Jun 2024 14:28:49 -0700 Subject: [PATCH 52/95] more warning cleanup --- src/long_mode/display.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index 535fc75..da138bb 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -885,8 +885,8 @@ impl DisplaySink for alloc::string::String { Ok(()) } - fn span_enter(&mut self, ty: TokenType) {} - fn span_end(&mut self, ty: TokenType) {} + fn span_enter(&mut self, _ty: TokenType) {} + fn span_end(&mut self, _ty: TokenType) {} } impl DisplaySink for BigEnoughString { @@ -1260,8 +1260,8 @@ impl DisplaySink for BigEnoughString { Ok(()) } - fn span_enter(&mut self, ty: TokenType) {} - fn span_end(&mut self, ty: TokenType) {} + fn span_enter(&mut self, _ty: TokenType) {} + fn span_end(&mut self, _ty: TokenType) {} } impl BigEnoughString { @@ -4709,7 +4709,7 @@ fn contextualize_intel(instr: &Instruction, out: &mut T) -> fmt: } if instr.operands[0 as usize].is_memory() { - unsafe { out.write_lt_8(MEM_SIZE_STRINGS.get_kinda_unchecked(instr.mem_size as usize))? }; + unsafe { out.write_lt_8(mem_size_label(instr.mem_size))? }; if let Some(prefix) = instr.segment_override_for_op(0) { let name = prefix.name(); out.write_char(' ')?; @@ -4735,7 +4735,7 @@ fn contextualize_intel(instr: &Instruction, out: &mut T) -> fmt: } if instr.operands[i as usize].is_memory() { - unsafe { out.write_lt_8(MEM_SIZE_STRINGS.get_kinda_unchecked(instr.mem_size as usize))? }; + unsafe { out.write_lt_8(mem_size_label(instr.mem_size))? }; if i >= 4 { unsafe { core::hint::unreachable_unchecked(); } } @@ -5151,7 +5151,7 @@ fn contextualize_c(instr: &Instruction, _address: u64, _context: } impl <'instr, T: fmt::Write, Y: YaxColors> ShowContextual for InstructionDisplayer<'instr> { - fn contextualize(&self, colors: &Y, address: u64, context: Option<&NoContext>, out: &mut T) -> fmt::Result { + fn contextualize(&self, _colors: &Y, address: u64, context: Option<&NoContext>, out: &mut T) -> fmt::Result { let InstructionDisplayer { instr, style, From 55a64ffa7fddc9d8372e4072c554a676e2f0fc17 Mon Sep 17 00:00:00 2001 From: iximeow Date: Wed, 19 Jun 2024 14:29:18 -0700 Subject: [PATCH 53/95] better testing for alternate sinks, fix hex formatting bug.... --- src/long_mode/display.rs | 60 +++++++++++++++++++++++++++++----------- test/long_mode/mod.rs | 45 ++++++++++++++++++++++++++++-- 2 files changed, 87 insertions(+), 18 deletions(-) diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index da138bb..198a930 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -758,11 +758,15 @@ impl DisplaySink for alloc::string::String { fn write_u8(&mut self, mut v: u8) -> Result<(), core::fmt::Error> { // we can fairly easily predict the size of a formatted string here with lzcnt, which also // means we can write directly into the correct offsets of the output string. - let printed_size = (((8 - v.leading_zeros()) >> 2) + 1) as usize; + let mut printed_size = ((8 - v.leading_zeros() + 3) >> 2) as usize; + if printed_size == 0 { + printed_size = 1; + } + self.reserve(printed_size); let buf = unsafe { self.as_mut_vec() }; - let p = buf.as_mut_ptr(); + let p = unsafe { buf.as_mut_ptr().offset(buf.len() as isize) }; let mut curr = printed_size; loop { let digit = v % 16; @@ -791,11 +795,15 @@ impl DisplaySink for alloc::string::String { fn write_u16(&mut self, mut v: u16) -> Result<(), core::fmt::Error> { // we can fairly easily predict the size of a formatted string here with lzcnt, which also // means we can write directly into the correct offsets of the output string. - let printed_size = (((16 - v.leading_zeros()) >> 2) + 1) as usize; + let mut printed_size = ((16 - v.leading_zeros() + 3) >> 2) as usize; + if printed_size == 0 { + printed_size = 1; + } + self.reserve(printed_size); let buf = unsafe { self.as_mut_vec() }; - let p = buf.as_mut_ptr(); + let p = unsafe { buf.as_mut_ptr().offset(buf.len() as isize) }; let mut curr = printed_size; loop { let digit = v % 16; @@ -826,11 +834,15 @@ impl DisplaySink for alloc::string::String { fn write_u32(&mut self, mut v: u32) -> Result<(), core::fmt::Error> { // we can fairly easily predict the size of a formatted string here with lzcnt, which also // means we can write directly into the correct offsets of the output string. - let printed_size = (((32 - v.leading_zeros()) >> 2) + 1) as usize; + let mut printed_size = ((32 - v.leading_zeros() + 3) >> 2) as usize; + if printed_size == 0 { + printed_size = 1; + } + self.reserve(printed_size); let buf = unsafe { self.as_mut_vec() }; - let p = buf.as_mut_ptr(); + let p = unsafe { buf.as_mut_ptr().offset(buf.len() as isize) }; let mut curr = printed_size; loop { let digit = v % 16; @@ -860,11 +872,15 @@ impl DisplaySink for alloc::string::String { fn write_u64(&mut self, mut v: u64) -> Result<(), core::fmt::Error> { // we can fairly easily predict the size of a formatted string here with lzcnt, which also // means we can write directly into the correct offsets of the output string. - let printed_size = (((64 - v.leading_zeros()) >> 2) + 1) as usize; + let mut printed_size = ((64 - v.leading_zeros() + 3) >> 2) as usize; + if printed_size == 0 { + printed_size = 1; + } + self.reserve(printed_size); let buf = unsafe { self.as_mut_vec() }; - let p = buf.as_mut_ptr(); + let p = unsafe { buf.as_mut_ptr().offset(buf.len() as isize) }; let mut curr = printed_size; loop { let digit = v % 16; @@ -1140,10 +1156,13 @@ impl DisplaySink for BigEnoughString { fn write_u8(&mut self, mut v: u8) -> Result<(), core::fmt::Error> { // we can fairly easily predict the size of a formatted string here with lzcnt, which also // means we can write directly into the correct offsets of the output string. - let printed_size = (((8 - v.leading_zeros()) >> 2) + 1) as usize; + let mut printed_size = ((8 - v.leading_zeros() + 3) >> 2) as usize; + if printed_size == 0 { + printed_size = 1; + } let buf = unsafe { self.content.as_mut_vec() }; - let p = buf.as_mut_ptr(); + let p = unsafe { buf.as_mut_ptr().offset(buf.len() as isize) }; let mut curr = printed_size; loop { let digit = v % 16; @@ -1172,10 +1191,13 @@ impl DisplaySink for BigEnoughString { fn write_u16(&mut self, mut v: u16) -> Result<(), core::fmt::Error> { // we can fairly easily predict the size of a formatted string here with lzcnt, which also // means we can write directly into the correct offsets of the output string. - let printed_size = (((16 - v.leading_zeros()) >> 2) + 1) as usize; + let mut printed_size = ((16 - v.leading_zeros() + 3) >> 2) as usize; + if printed_size == 0 { + printed_size = 1; + } let buf = unsafe { self.content.as_mut_vec() }; - let p = buf.as_mut_ptr(); + let p = unsafe { buf.as_mut_ptr().offset(buf.len() as isize) }; let mut curr = printed_size; loop { let digit = v % 16; @@ -1205,10 +1227,13 @@ impl DisplaySink for BigEnoughString { fn write_u32(&mut self, mut v: u32) -> Result<(), core::fmt::Error> { // we can fairly easily predict the size of a formatted string here with lzcnt, which also // means we can write directly into the correct offsets of the output string. - let printed_size = (((32 - v.leading_zeros()) >> 2) + 1) as usize; + let mut printed_size = ((32 - v.leading_zeros() + 3) >> 2) as usize; + if printed_size == 0 { + printed_size = 1; + } let buf = unsafe { self.content.as_mut_vec() }; - let p = buf.as_mut_ptr(); + let p = unsafe { buf.as_mut_ptr().offset(buf.len() as isize) }; let mut curr = printed_size; loop { let digit = v % 16; @@ -1238,10 +1263,13 @@ impl DisplaySink for BigEnoughString { fn write_u64(&mut self, mut v: u64) -> Result<(), core::fmt::Error> { // we can fairly easily predict the size of a formatted string here with lzcnt, which also // means we can write directly into the correct offsets of the output string. - let printed_size = (((64 - v.leading_zeros()) >> 2) + 1) as usize; + let mut printed_size = ((64 - v.leading_zeros() + 3) >> 2) as usize; + if printed_size == 0 { + printed_size = 1; + } let buf = unsafe { self.content.as_mut_vec() }; - let p = buf.as_mut_ptr(); + let p = unsafe { buf.as_mut_ptr().offset(buf.len() as isize) }; let mut curr = printed_size; loop { let digit = v % 16; diff --git a/test/long_mode/mod.rs b/test/long_mode/mod.rs index 8b01461..24df133 100644 --- a/test/long_mode/mod.rs +++ b/test/long_mode/mod.rs @@ -15,6 +15,19 @@ use std::fmt::Write; use yaxpeax_arch::{AddressBase, Decoder, LengthedInstruction}; use yaxpeax_x86::long_mode::InstDecoder; +#[test] +#[cfg(feature="std")] +fn test_write_hex_specialization() { + use crate::yaxpeax_x86::long_mode::DisplaySink; + for i in 0..0xffu8 { + let mut out = yaxpeax_x86::long_mode::BigEnoughString::new(); + out.write_char('0').unwrap(); + out.write_char('x').unwrap(); + out.write_u8(i).unwrap(); + assert_eq!(out.into_inner(), format!("0x{:x}", i)); + } +} + fn test_invalid(data: &[u8]) { test_invalid_under(&InstDecoder::default(), data); } @@ -62,23 +75,51 @@ fn test_display_under(decoder: &InstDecoder, data: &[u8], expected: &'static str text, expected ); + let mut text2 = yaxpeax_x86::long_mode::BigEnoughString::new(); let mut out = yaxpeax_x86::long_mode::NoColorsSink { out: &mut text2, }; - instr.write_to(&mut out); + instr.write_to(&mut out).expect("printing succeeds"); core::mem::drop(out); let text2 = text2.into_inner(); assert!( text2 == text, - "display error for {}:\n decoded: {:?} under decoder {}\n displayed: {}\n expected: {}\n", + "display error through NoColorsSink for {}:\n decoded: {:?} under decoder {}\n displayed: {}\n expected: {}\n", hex, instr, decoder, text2, text, ); + + let mut text3 = yaxpeax_x86::long_mode::BigEnoughString::new(); + instr.write_to(&mut text3).expect("printing succeeds"); + let text3 = text3.into_inner(); + + assert!( + text3 == text, + "display error through BigEnoughString for {}:\n decoded: {:?} under decoder {}\n displayed: {}\n expected: {}\n", + hex, + instr, + decoder, + text3, + text, + ); + + let mut text4 = String::new(); + instr.write_to(&mut text4).expect("printing succeeds"); + + assert!( + text4 == text, + "display error through String for {}:\n decoded: {:?} under decoder {}\n displayed: {}\n expected: {}\n", + hex, + instr, + decoder, + text4, + text, + ); } else { eprintln!("non-fmt build cannot compare text equality") } From 89b8aeef32518a0fece8c1283fe83da369113afd Mon Sep 17 00:00:00 2001 From: iximeow Date: Thu, 20 Jun 2024 12:40:53 -0700 Subject: [PATCH 54/95] starting to get new DisplaySink stuff ready to extract... --- src/long_mode/display.rs | 305 ++++++++++++++++++++++++++------------- src/long_mode/mod.rs | 2 +- test/long_mode/mod.rs | 15 +- 3 files changed, 209 insertions(+), 113 deletions(-) diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index 198a930..45c488e 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -4,6 +4,7 @@ use yaxpeax_arch::{Colorize, ShowContextual, NoColors, YaxColors}; use yaxpeax_arch::display::*; use crate::safer_unchecked::GetSaferUnchecked as _; +use crate::safer_unchecked::unreachable_kinda_unchecked; use crate::MEM_SIZE_STRINGS; use crate::long_mode::{RegSpec, Opcode, Operand, MergeMode, InstDecoder, Instruction, Segment, PrefixRex, OperandSpec}; @@ -370,6 +371,40 @@ pub enum TokenType { Offset, } +/// `DisplaySink` allows client code to collect output and minimal markup. this is currently used +/// in formatting instructions for two reasons: +/// * `DisplaySink` implementations have the opportunity to collect starts and ends of tokens at +/// the same time as collecting output itself. +/// * `DisplaySink` implementations provides specialized functions for writing strings in +/// circumstances where a simple "use `core::fmt`" might incur unwanted overhead. +/// +/// spans are reported through `span_start` and `span_exit` to avoid constraining implementations +/// into tracking current output offset (which may not be knowable) or span size (which may be +/// knowable, but incur additional overhead to compute or track). +/// +/// spans are entered and exited in a FILO manner: a function writing to some `DisplaySink` must +/// exit spans in reverse order to when they are entered. a function sequence like +/// `sink.span_start(Operand); sink.span_start(Immediate); sink.span_exit(Operand)` is in error. +/// +/// the `write_*` helpers on `DisplaySink` may be able to take advantage of contraints described in +/// documentation here to better support writing some kinds of inputs than a fully-general solution +/// (such as `core::fmt`) might be able to yield. +/// +/// currently there are two motivating factors for `write_*` helpers: +/// +/// instruction formatting often involves writing small but variable-size strings, such as register +/// names, which is something of a pathological case for string appending as Rust currently exists: +/// this often becomes `memcpy` and specifically a call to the platform's `memcpy` (rather than an +/// inlined `rep movsb`) just to move 3-5 bytes. one relevant Rust issue for reference: +/// https://github.com/rust-lang/rust/issues/92993#issuecomment-2028915232 +/// +/// there are similar papercuts around formatting integers as base-16 numbers, such as +/// https://github.com/rust-lang/rust/pull/122770 . in isolation and in most applications these are +/// not a significant source of overhead. but for programs bounded on decoding and printing +/// instructions, these can add up to significant overhead - on the order of 10-20% of total +/// runtime. +/// +/// `DisplaySink` pub trait DisplaySink: fmt::Write { #[inline(always)] fn write_fixed_size(&mut self, s: &str) -> Result<(), core::fmt::Error> { @@ -436,9 +471,30 @@ pub trait DisplaySink: fmt::Write { fn write_u64(&mut self, v: u64) -> Result<(), core::fmt::Error> { write!(self, "{:x}", v) } - // fn write_char(&mut self, c: char) -> Result<(), core::fmt::Error>; - fn span_enter(&mut self, ty: TokenType); - fn span_end(&mut self, ty: TokenType); + /// enter a region inside which output corresponds to a `ty`. + /// + /// the default implementation of these functions is as a no-op. this way, providing span + /// information to a `DisplaySink` that does not want it is eliminated at compile time. + /// + /// spans are entered and ended in a FILO manner: a function writing to some `DisplaySink` must + /// end spans in reverse order to when they are entered. a function sequence like + /// `sink.span_start(Operand); sink.span_start(Immediate); sink.span_end(Operand)` is in error. + /// + /// a simple use of `span_start`/`span_end` might look something like: + /// ```compile_fail + /// sink.span_start(Operand) + /// sink.write_char('[') + /// sink.span_start(Register) + /// sink.write_fixed_size("rbp") + /// sink.span_end(Register) + /// sink.write_char(']') + /// sink.span_end(Operand) + /// ``` + /// which writes the text `[rbp]`, with span indicators where the operand (`[ ... ]`) begins, + /// as well as the start and end of a register name. + fn span_start(&mut self, _ty: TokenType) { } + /// end a region where a `ty` was written. see docs on [`DisplaySink::span_start`] for more. + fn span_end(&mut self, _ty: TokenType) { } } pub struct NoColorsSink<'a, T: fmt::Write> { @@ -446,7 +502,7 @@ pub struct NoColorsSink<'a, T: fmt::Write> { } impl<'a, T: fmt::Write> DisplaySink for NoColorsSink<'a, T> { - fn span_enter(&mut self, _ty: TokenType) { } + fn span_start(&mut self, _ty: TokenType) { } fn span_end(&mut self, _ty: TokenType) { } } @@ -462,44 +518,135 @@ impl<'a, T: fmt::Write> fmt::Write for NoColorsSink<'a, T> { } } -/* -impl DisplaySink for T { +/// helper to format `amd64` instructions with highest throughupt and least configuration. +/// +/// ### when to use this over `fmt::Display`? +/// +/// `fmt::Display` is a fair choice in most cases. in some cases, `InstructionFormatter` may +/// support formatting options that may be difficult to configure for a `Display` impl. +/// additionally, `InstructionFormatter` may be able to specialize more effectively where +/// `fmt::Display`, writing to a generic `fmt::Write`, may not. +/// +/// if your use case for `yaxpeax-x86` involves being bounded on the speed of disassembling and +/// formatting instructions, [`InstructionFormatter::format_inst`] has been measured as up to 11% +/// faster than an equivalent `write!(buf, "{}", inst)`. +/// +/// `InstructionFormatter` involves internal allocations; if your use case for `yaxpeax-x86` +/// requires allocations never occurring, it is not an appropriate tool. +/// +/// ### example +/// +/// ``` +/// use yaxpeax_x86::long_mode::InstDecoder; +/// use yaxpeax_x86::long_mode::InstructionFormatter; +/// +/// let bytes = &[0x33, 0xc0]; +/// let inst = InstDecoder::default().decode_slice(bytes).expect("can decode"); +/// let mut formatter = InstructionFormatter::new(); +/// assert_eq!( +/// formatter.format_inst(&inst).expect("can format"), +/// "xor eax, eax" +/// ); +/// +/// // or, getting the formatted instruction with `text_str`: +/// assert_eq!( +/// formatter.text_str(), +/// "xor eax, eax" +/// ); +/// ``` +pub struct InstructionFormatter { + content: alloc::string::String, +} - /* - fn write_str(&mut self) -> Result<(), core::fmt::Error> { - ::write_str(self, s) +impl InstructionFormatter { + /// create an `InstructionFormatter` with default settings. `InstructionFormatter`'s default + /// settings format instructions identically to their corresponding `fmt::Display`. + pub fn new() -> Self { + let mut buf = alloc::string::String::new(); + // TODO: move 512 out to a MAX_INSTRUCTION_LEN const and appropriate justification (and + // fuzzing and ..) + buf.reserve(512); + Self { + content: buf, + } } - fn write_char(&mut self) -> Result<(), core::fmt::Error> { - ::write_char(self, c) + + /// format `inst` through this formatter, storing the formatted text in this formatter's + /// internal buffer. returns a borrow of that same internal buffer for convenience. + /// + /// this clears and reuses an internal buffer; if an instruction had been previously formatted + /// through this formatter, it will be overwritten. + pub fn format_inst<'formatter>(&'formatter mut self, inst: &Instruction) -> Result<&'formatter str, fmt::Error> { + let mut handle = self.write_handle(); + + inst.write_to(&mut handle)?; + + Ok(self.text_str()) + } + + /// return a borrow of this formatter's buffer. if an instruction has been formatted, the + /// returned `&str` contains that formatted instruction's text. + pub fn text_str(&self) -> &str { + self.content.as_str() + } + + fn write_handle(&mut self) -> InstructionTextSink { + self.content.clear(); + InstructionTextSink { + buf: &mut self.content + } } - */ - fn span_enter(&mut self, _ty: TokenType) { } - fn span_end(&mut self, _ty: TokenType) { } } -*/ -pub struct BigEnoughString { - content: alloc::string::String, +/// this private struct is guaranteed to contain a string that is long enough to hold a +/// fully-formatted x86 instruction. +/// +/// this is wildly dangerous in general use, but because of the constrained lifecycle of +/// `InstructionTextSink` in the context of `InstructionFormatter`, it's OK to use here. it is +/// wildly dangerous because writing to this sink does not bounds check and assumes the contained +/// `buf` is large enough for any input. as an example: if `buf` did not have enough space +/// available from its current position, the `write_*` methods would write into whatever happens to +/// be after `buf` in memory. +/// +/// don't make this pub. if this is pub in docs, it's a bug. +struct InstructionTextSink<'buf> { + buf: &'buf mut alloc::string::String } -// TODO: move this to an impl on a handle from BigEnoughString obtained through an `unsafe fn` that -// clearly states requirements -impl fmt::Write for BigEnoughString { +impl<'buf> fmt::Write for InstructionTextSink<'buf> { fn write_str(&mut self, s: &str) -> Result<(), core::fmt::Error> { - self.content.write_str(s) + self.buf.write_str(s) } fn write_char(&mut self, c: char) -> Result<(), core::fmt::Error> { - // SAFETY: TODO: goodness, what + // SAFETY: `buf` is assumed to be long enough to hold all input, `buf` at `underlying.len()` + // is valid for writing, but may be uninitialized. + // + // this function is essentially equivalent to `Vec::push` specialized for the case that + // `len < buf.capacity()`: + // https://github.com/rust-lang/rust/blob/be9e27e/library/alloc/src/vec/mod.rs#L1993-L2006 unsafe { - let underlying = self.content.as_mut_vec(); - underlying.as_mut_ptr().offset(underlying.len() as isize).write(c as u8); + let underlying = self.buf.as_mut_vec(); + // `InstructionTextSink::write_char` is only used by yaxpeax-x86, and is only used to + // write single ASCII characters. this is wrong in the general case, but `write_char` + // here is not going to be used in the general case. + if cfg!(debug_asertions) { + panic!("InstructionTextSink::write_char would truncate output"); + } + let to_push = c as u8; + // `ptr::write` here because `underlying.add(underlying.len())` may not point to an + // initialized value, which would mean that turning that pointer into a `&mut u8` to + // store through would be UB. `ptr::write` avoids taking the mut ref. + underlying.as_mut_ptr().offset(underlying.len() as isize).write(to_push); + // we have initialized all (one) bytes that `set_len` is increasing the length to + // include. underlying.set_len(underlying.len() + 1); } Ok(()) } } -// TODO: delete this whole thing? maybe? +/// this DisplaySink impl exists to support somewhat more performant buffering of the kinds of +/// strings `yaxpeax-x86` uses in formatting instructions. impl DisplaySink for alloc::string::String { #[inline(always)] fn write_fixed_size(&mut self, s: &str) -> Result<(), core::fmt::Error> { @@ -508,18 +655,18 @@ impl DisplaySink for alloc::string::String { let new_bytes = s.as_bytes(); if new_bytes.len() == 0 { - unsafe { core::hint::unreachable_unchecked() } + unsafe { unreachable_kinda_unchecked() } } if new_bytes.len() >= 16 { - unsafe { core::hint::unreachable_unchecked() } + unsafe { unreachable_kinda_unchecked() } } unsafe { let dest = buf.as_mut_ptr().offset(buf.len() as isize); // this used to be enough to bamboozle llvm away from - // https://github.com/rust-lang/rust/issues/92993#issuecomment-2028915232https://github.com/rust-lang/rust/issues/92993#issuecomment-2028915232 + // https://github.com/rust-lang/rust/issues/92993#issuecomment-2028915232 // if `s` is not fixed size. somewhere between Rust 1.68 and Rust 1.74 this stopped // being sufficient, so `write_fixed_size` truly should only be used for fixed size `s` // (otherwise this is a libc memcpy call in disguise). for fixed-size strings this @@ -545,10 +692,6 @@ impl DisplaySink for alloc::string::String { if new_bytes.len() >= 32 { unsafe { core::hint::unreachable_unchecked() } } - // should get DCE - if new_bytes.len() == 0 { - unsafe { core::hint::unreachable_unchecked() } - } unsafe { let dest = buf.as_mut_ptr().offset(buf.len() as isize); @@ -625,10 +768,6 @@ impl DisplaySink for alloc::string::String { if new_bytes.len() >= 16 { unsafe { core::hint::unreachable_unchecked() } } - // should get DCE - if new_bytes.len() == 0 { - unsafe { core::hint::unreachable_unchecked() } - } unsafe { let dest = buf.as_mut_ptr().offset(buf.len() as isize); @@ -696,10 +835,6 @@ impl DisplaySink for alloc::string::String { if new_bytes.len() >= 8 { unsafe { core::hint::unreachable_unchecked() } } - // should get DCE - if new_bytes.len() == 0 { - unsafe { core::hint::unreachable_unchecked() } - } unsafe { let dest = buf.as_mut_ptr().offset(buf.len() as isize); @@ -792,6 +927,7 @@ impl DisplaySink for alloc::string::String { /// this is provided for optimization opportunities when the formatted integer can be written /// directly to the sink (rather than formatted to an intermediate buffer and output as a /// followup step) + #[inline(always)] fn write_u16(&mut self, mut v: u16) -> Result<(), core::fmt::Error> { // we can fairly easily predict the size of a formatted string here with lzcnt, which also // means we can write directly into the correct offsets of the output string. @@ -823,7 +959,6 @@ impl DisplaySink for alloc::string::String { } Ok(()) - } /// write a u32 to the output as a base-16 integer. /// @@ -901,22 +1036,22 @@ impl DisplaySink for alloc::string::String { Ok(()) } - fn span_enter(&mut self, _ty: TokenType) {} + fn span_start(&mut self, _ty: TokenType) {} fn span_end(&mut self, _ty: TokenType) {} } -impl DisplaySink for BigEnoughString { +impl<'buf> DisplaySink for InstructionTextSink<'buf> { #[inline(always)] fn write_fixed_size(&mut self, s: &str) -> Result<(), core::fmt::Error> { - let buf = unsafe { self.content.as_mut_vec() }; + let buf = unsafe { self.buf.as_mut_vec() }; let new_bytes = s.as_bytes(); if new_bytes.len() == 0 { - unsafe { core::hint::unreachable_unchecked() } + unsafe { unreachable_kinda_unchecked() } } if new_bytes.len() >= 16 { - unsafe { core::hint::unreachable_unchecked() } + unsafe { unreachable_kinda_unchecked() } } unsafe { @@ -940,17 +1075,13 @@ impl DisplaySink for BigEnoughString { } unsafe fn write_lt_32(&mut self, s: &str) -> Result<(), fmt::Error> { // SAFETY: todo - let buf = unsafe { self.content.as_mut_vec() }; + let buf = unsafe { self.buf.as_mut_vec() }; let new_bytes = s.as_bytes(); // should get DCE if new_bytes.len() >= 32 { unsafe { core::hint::unreachable_unchecked() } } - // should get DCE - if new_bytes.len() == 0 { - unsafe { core::hint::unreachable_unchecked() } - } unsafe { let dest = buf.as_mut_ptr().offset(buf.len() as isize); @@ -1018,17 +1149,13 @@ impl DisplaySink for BigEnoughString { } unsafe fn write_lt_16(&mut self, s: &str) -> Result<(), fmt::Error> { // SAFETY: todo - let buf = unsafe { self.content.as_mut_vec() }; + let buf = unsafe { self.buf.as_mut_vec() }; let new_bytes = s.as_bytes(); // should get DCE if new_bytes.len() >= 16 { unsafe { core::hint::unreachable_unchecked() } } - // should get DCE - if new_bytes.len() == 0 { - unsafe { core::hint::unreachable_unchecked() } - } unsafe { let dest = buf.as_mut_ptr().offset(buf.len() as isize); @@ -1087,17 +1214,13 @@ impl DisplaySink for BigEnoughString { } unsafe fn write_lt_8(&mut self, s: &str) -> Result<(), fmt::Error> { // SAFETY: todo - let buf = unsafe { self.content.as_mut_vec() }; + let buf = unsafe { self.buf.as_mut_vec() }; let new_bytes = s.as_bytes(); // should get DCE if new_bytes.len() >= 8 { unsafe { core::hint::unreachable_unchecked() } } - // should get DCE - if new_bytes.len() == 0 { - unsafe { core::hint::unreachable_unchecked() } - } unsafe { let dest = buf.as_mut_ptr().offset(buf.len() as isize); @@ -1161,7 +1284,7 @@ impl DisplaySink for BigEnoughString { printed_size = 1; } - let buf = unsafe { self.content.as_mut_vec() }; + let buf = unsafe { self.buf.as_mut_vec() }; let p = unsafe { buf.as_mut_ptr().offset(buf.len() as isize) }; let mut curr = printed_size; loop { @@ -1188,6 +1311,7 @@ impl DisplaySink for BigEnoughString { /// this is provided for optimization opportunities when the formatted integer can be written /// directly to the sink (rather than formatted to an intermediate buffer and output as a /// followup step) + #[inline(always)] fn write_u16(&mut self, mut v: u16) -> Result<(), core::fmt::Error> { // we can fairly easily predict the size of a formatted string here with lzcnt, which also // means we can write directly into the correct offsets of the output string. @@ -1196,7 +1320,7 @@ impl DisplaySink for BigEnoughString { printed_size = 1; } - let buf = unsafe { self.content.as_mut_vec() }; + let buf = unsafe { self.buf.as_mut_vec() }; let p = unsafe { buf.as_mut_ptr().offset(buf.len() as isize) }; let mut curr = printed_size; loop { @@ -1232,7 +1356,7 @@ impl DisplaySink for BigEnoughString { printed_size = 1; } - let buf = unsafe { self.content.as_mut_vec() }; + let buf = unsafe { self.buf.as_mut_vec() }; let p = unsafe { buf.as_mut_ptr().offset(buf.len() as isize) }; let mut curr = printed_size; loop { @@ -1268,7 +1392,7 @@ impl DisplaySink for BigEnoughString { printed_size = 1; } - let buf = unsafe { self.content.as_mut_vec() }; + let buf = unsafe { self.buf.as_mut_vec() }; let p = unsafe { buf.as_mut_ptr().offset(buf.len() as isize) }; let mut curr = printed_size; loop { @@ -1288,37 +1412,10 @@ impl DisplaySink for BigEnoughString { Ok(()) } - fn span_enter(&mut self, _ty: TokenType) {} + fn span_start(&mut self, _ty: TokenType) {} fn span_end(&mut self, _ty: TokenType) {} } -impl BigEnoughString { - pub fn clear(&mut self) { - self.content.clear(); - } - - pub fn into_inner(self) -> alloc::string::String { - self.content - } - - pub fn from_string(mut s: alloc::string::String) -> Self { - s.reserve(256); - // safety: the string is large enough - unsafe { Self::from_string_unchecked(s) } - } - - pub fn new() -> Self { - Self::from_string(alloc::string::String::new()) - } - - /// safety: CALLER MUST ENSURE S IS LARGE ENOUGH TO HOLD ANY DISASSEMBLED x86 INSTRUCTION - unsafe fn from_string_unchecked(s: alloc::string::String) -> Self { - Self { - content: s - } - } -} - struct ColorizingOperandVisitor<'a, T> { f: &'a mut T, } @@ -1329,7 +1426,7 @@ impl crate::long_mode::OperandVisitor for ColorizingOperandVisi #[cfg_attr(feature="profiling", inline(never))] fn visit_u8(&mut self, imm: u8) -> Result { - self.f.span_enter(TokenType::Immediate); + self.f.span_start(TokenType::Immediate); self.f.write_fixed_size("0x")?; self.f.write_u8(imm)?; self.f.span_end(TokenType::Immediate); @@ -1337,7 +1434,7 @@ impl crate::long_mode::OperandVisitor for ColorizingOperandVisi } #[cfg_attr(feature="profiling", inline(never))] fn visit_i8(&mut self, imm: i8) -> Result { - self.f.span_enter(TokenType::Immediate); + self.f.span_start(TokenType::Immediate); let mut v = imm as u8; if imm < 0 { self.f.write_char('-')?; @@ -1350,7 +1447,7 @@ impl crate::long_mode::OperandVisitor for ColorizingOperandVisi } #[cfg_attr(feature="profiling", inline(never))] fn visit_u16(&mut self, imm: u16) -> Result { - self.f.span_enter(TokenType::Immediate); + self.f.span_start(TokenType::Immediate); self.f.write_fixed_size("0x")?; self.f.write_u16(imm)?; self.f.span_end(TokenType::Immediate); @@ -1358,7 +1455,7 @@ impl crate::long_mode::OperandVisitor for ColorizingOperandVisi } #[cfg_attr(feature="profiling", inline(never))] fn visit_i16(&mut self, imm: i16) -> Result { - self.f.span_enter(TokenType::Immediate); + self.f.span_start(TokenType::Immediate); let mut v = imm as u16; if imm < 0 { self.f.write_char('-')?; @@ -1371,14 +1468,14 @@ impl crate::long_mode::OperandVisitor for ColorizingOperandVisi } #[cfg_attr(feature="profiling", inline(never))] fn visit_u32(&mut self, imm: u32) -> Result { - self.f.span_enter(TokenType::Immediate); + self.f.span_start(TokenType::Immediate); self.f.write_fixed_size("0x")?; self.f.write_u32(imm)?; self.f.span_end(TokenType::Immediate); Ok(()) } fn visit_i32(&mut self, imm: i32) -> Result { - self.f.span_enter(TokenType::Immediate); + self.f.span_start(TokenType::Immediate); let mut v = imm as u32; if imm < 0 { self.f.write_char('-')?; @@ -1391,7 +1488,7 @@ impl crate::long_mode::OperandVisitor for ColorizingOperandVisi } #[cfg_attr(feature="profiling", inline(never))] fn visit_u64(&mut self, imm: u64) -> Result { - self.f.span_enter(TokenType::Immediate); + self.f.span_start(TokenType::Immediate); self.f.write_fixed_size("0x")?; self.f.write_u64(imm)?; self.f.span_end(TokenType::Immediate); @@ -1399,7 +1496,7 @@ impl crate::long_mode::OperandVisitor for ColorizingOperandVisi } #[cfg_attr(feature="profiling", inline(never))] fn visit_i64(&mut self, imm: i64) -> Result { - self.f.span_enter(TokenType::Immediate); + self.f.span_start(TokenType::Immediate); let mut v = imm as u64; if imm < 0 { self.f.write_char('-')?; @@ -1412,18 +1509,18 @@ impl crate::long_mode::OperandVisitor for ColorizingOperandVisi } #[cfg_attr(feature="profiling", inline(never))] fn visit_reg(&mut self, reg: RegSpec) -> Result { - self.f.span_enter(TokenType::Register); + self.f.span_start(TokenType::Register); unsafe { self.f.write_lt_8(regspec_label(®))?; } self.f.span_end(TokenType::Register); Ok(()) } fn visit_reg_mask_merge(&mut self, spec: RegSpec, mask: RegSpec, merge_mode: MergeMode) -> Result { - self.f.span_enter(TokenType::Register); + self.f.span_start(TokenType::Register); unsafe { self.f.write_lt_8(regspec_label(&spec))?; } self.f.span_end(TokenType::Register); if mask.num != 0 { self.f.write_fixed_size("{")?; - self.f.span_enter(TokenType::Register); + self.f.span_start(TokenType::Register); unsafe { self.f.write_lt_8(regspec_label(&mask))?; } self.f.span_end(TokenType::Register); self.f.write_fixed_size("}")?; diff --git a/src/long_mode/mod.rs b/src/long_mode/mod.rs index 5a5b89a..44ed89f 100644 --- a/src/long_mode/mod.rs +++ b/src/long_mode/mod.rs @@ -9,7 +9,7 @@ pub use crate::MemoryAccessSize; #[cfg(feature = "fmt")] pub use self::display::{DisplayStyle, InstructionDisplayer}; #[cfg(feature = "fmt")] -pub use self::display::{BigEnoughString, NoColorsSink, DisplaySink, TokenType}; +pub use self::display::{InstructionFormatter, NoColorsSink, DisplaySink, TokenType}; use core::cmp::PartialEq; use crate::safer_unchecked::unreachable_kinda_unchecked as unreachable_unchecked; diff --git a/test/long_mode/mod.rs b/test/long_mode/mod.rs index 24df133..dcc9aad 100644 --- a/test/long_mode/mod.rs +++ b/test/long_mode/mod.rs @@ -15,18 +15,20 @@ use std::fmt::Write; use yaxpeax_arch::{AddressBase, Decoder, LengthedInstruction}; use yaxpeax_x86::long_mode::InstDecoder; +/* #[test] #[cfg(feature="std")] fn test_write_hex_specialization() { use crate::yaxpeax_x86::long_mode::DisplaySink; for i in 0..0xffu8 { - let mut out = yaxpeax_x86::long_mode::BigEnoughString::new(); + let mut out = yaxpeax_x86::long_mode::InstructionFormatter::new(); out.write_char('0').unwrap(); out.write_char('x').unwrap(); out.write_u8(i).unwrap(); assert_eq!(out.into_inner(), format!("0x{:x}", i)); } } +*/ fn test_invalid(data: &[u8]) { test_invalid_under(&InstDecoder::default(), data); @@ -76,13 +78,11 @@ fn test_display_under(decoder: &InstDecoder, data: &[u8], expected: &'static str expected ); - let mut text2 = yaxpeax_x86::long_mode::BigEnoughString::new(); + let mut text2 = String::new(); let mut out = yaxpeax_x86::long_mode::NoColorsSink { out: &mut text2, }; instr.write_to(&mut out).expect("printing succeeds"); - core::mem::drop(out); - let text2 = text2.into_inner(); assert!( text2 == text, @@ -94,13 +94,12 @@ fn test_display_under(decoder: &InstDecoder, data: &[u8], expected: &'static str text, ); - let mut text3 = yaxpeax_x86::long_mode::BigEnoughString::new(); - instr.write_to(&mut text3).expect("printing succeeds"); - let text3 = text3.into_inner(); + let mut formatter = yaxpeax_x86::long_mode::InstructionFormatter::new(); + let text3 = formatter.format_inst(&instr).expect("printing succeeds"); assert!( text3 == text, - "display error through BigEnoughString for {}:\n decoded: {:?} under decoder {}\n displayed: {}\n expected: {}\n", + "display error through InstructionFormatter for {}:\n decoded: {:?} under decoder {}\n displayed: {}\n expected: {}\n", hex, instr, decoder, From 3c8271ae9d45bdae33f0d1d057fb5239c893b6c5 Mon Sep 17 00:00:00 2001 From: iximeow Date: Thu, 20 Jun 2024 13:03:18 -0700 Subject: [PATCH 55/95] slightly simpler (?) write_u* impls --- src/long_mode/display.rs | 168 +++++++++++++++++++++------------------ 1 file changed, 89 insertions(+), 79 deletions(-) diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index 45c488e..c69e648 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -893,22 +893,27 @@ impl DisplaySink for alloc::string::String { fn write_u8(&mut self, mut v: u8) -> Result<(), core::fmt::Error> { // we can fairly easily predict the size of a formatted string here with lzcnt, which also // means we can write directly into the correct offsets of the output string. - let mut printed_size = ((8 - v.leading_zeros() + 3) >> 2) as usize; + let printed_size = ((8 - v.leading_zeros() + 3) >> 2) as usize; if printed_size == 0 { - printed_size = 1; + return self.write_fixed_size("0"); } self.reserve(printed_size); let buf = unsafe { self.as_mut_vec() }; - let p = unsafe { buf.as_mut_ptr().offset(buf.len() as isize) }; - let mut curr = printed_size; + let new_len = buf.len() + printed_size; + + unsafe { + buf.set_len(new_len); + } + let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; + loop { let digit = v % 16; let c = c_to_hex(digit as u8); - curr -= 1; unsafe { - p.offset(curr as isize).write(c); + p = p.offset(-1); + p.write(c); } v = v / 16; if v == 0 { @@ -916,10 +921,6 @@ impl DisplaySink for alloc::string::String { } } - unsafe { - buf.set_len(buf.len() + printed_size); - } - Ok(()) } /// write a u16 to the output as a base-16 integer. @@ -931,22 +932,27 @@ impl DisplaySink for alloc::string::String { fn write_u16(&mut self, mut v: u16) -> Result<(), core::fmt::Error> { // we can fairly easily predict the size of a formatted string here with lzcnt, which also // means we can write directly into the correct offsets of the output string. - let mut printed_size = ((16 - v.leading_zeros() + 3) >> 2) as usize; + let printed_size = ((16 - v.leading_zeros() + 3) >> 2) as usize; if printed_size == 0 { - printed_size = 1; + return self.write_fixed_size("0"); } self.reserve(printed_size); let buf = unsafe { self.as_mut_vec() }; - let p = unsafe { buf.as_mut_ptr().offset(buf.len() as isize) }; - let mut curr = printed_size; + let new_len = buf.len() + printed_size; + + unsafe { + buf.set_len(new_len); + } + let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; + loop { let digit = v % 16; let c = c_to_hex(digit as u8); - curr -= 1; unsafe { - p.offset(curr as isize).write(c); + p = p.offset(-1); + p.write(c); } v = v / 16; if v == 0 { @@ -954,10 +960,6 @@ impl DisplaySink for alloc::string::String { } } - unsafe { - buf.set_len(buf.len() + printed_size); - } - Ok(()) } /// write a u32 to the output as a base-16 integer. @@ -969,22 +971,27 @@ impl DisplaySink for alloc::string::String { fn write_u32(&mut self, mut v: u32) -> Result<(), core::fmt::Error> { // we can fairly easily predict the size of a formatted string here with lzcnt, which also // means we can write directly into the correct offsets of the output string. - let mut printed_size = ((32 - v.leading_zeros() + 3) >> 2) as usize; + let printed_size = ((32 - v.leading_zeros() + 3) >> 2) as usize; if printed_size == 0 { - printed_size = 1; + return self.write_fixed_size("0"); } self.reserve(printed_size); let buf = unsafe { self.as_mut_vec() }; - let p = unsafe { buf.as_mut_ptr().offset(buf.len() as isize) }; - let mut curr = printed_size; + let new_len = buf.len() + printed_size; + + unsafe { + buf.set_len(new_len); + } + let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; + loop { let digit = v % 16; let c = c_to_hex(digit as u8); - curr -= 1; unsafe { - p.offset(curr as isize).write(c); + p = p.offset(-1); + p.write(c); } v = v / 16; if v == 0 { @@ -992,10 +999,6 @@ impl DisplaySink for alloc::string::String { } } - unsafe { - buf.set_len(buf.len() + printed_size); - } - Ok(()) } /// write a u64 to the output as a base-16 integer. @@ -1007,22 +1010,27 @@ impl DisplaySink for alloc::string::String { fn write_u64(&mut self, mut v: u64) -> Result<(), core::fmt::Error> { // we can fairly easily predict the size of a formatted string here with lzcnt, which also // means we can write directly into the correct offsets of the output string. - let mut printed_size = ((64 - v.leading_zeros() + 3) >> 2) as usize; + let printed_size = ((64 - v.leading_zeros() + 3) >> 2) as usize; if printed_size == 0 { - printed_size = 1; + return self.write_fixed_size("0"); } self.reserve(printed_size); let buf = unsafe { self.as_mut_vec() }; - let p = unsafe { buf.as_mut_ptr().offset(buf.len() as isize) }; - let mut curr = printed_size; + let new_len = buf.len() + printed_size; + + unsafe { + buf.set_len(new_len); + } + let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; + loop { let digit = v % 16; let c = c_to_hex(digit as u8); - curr -= 1; unsafe { - p.offset(curr as isize).write(c); + p = p.offset(-1); + p.write(c); } v = v / 16; if v == 0 { @@ -1030,10 +1038,6 @@ impl DisplaySink for alloc::string::String { } } - unsafe { - buf.set_len(buf.len() + printed_size); - } - Ok(()) } fn span_start(&mut self, _ty: TokenType) {} @@ -1047,7 +1051,7 @@ impl<'buf> DisplaySink for InstructionTextSink<'buf> { let new_bytes = s.as_bytes(); if new_bytes.len() == 0 { - unsafe { unreachable_kinda_unchecked() } + return Ok(()); } if new_bytes.len() >= 16 { @@ -1279,20 +1283,25 @@ impl<'buf> DisplaySink for InstructionTextSink<'buf> { fn write_u8(&mut self, mut v: u8) -> Result<(), core::fmt::Error> { // we can fairly easily predict the size of a formatted string here with lzcnt, which also // means we can write directly into the correct offsets of the output string. - let mut printed_size = ((8 - v.leading_zeros() + 3) >> 2) as usize; + let printed_size = ((8 - v.leading_zeros() + 3) >> 2) as usize; if printed_size == 0 { - printed_size = 1; + return self.write_fixed_size("0"); } let buf = unsafe { self.buf.as_mut_vec() }; - let p = unsafe { buf.as_mut_ptr().offset(buf.len() as isize) }; - let mut curr = printed_size; + let new_len = buf.len() + printed_size; + + unsafe { + buf.set_len(new_len); + } + let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; + loop { let digit = v % 16; let c = c_to_hex(digit as u8); - curr -= 1; unsafe { - p.offset(curr as isize).write(c); + p = p.offset(-1); + p.write(c); } v = v / 16; if v == 0 { @@ -1300,10 +1309,6 @@ impl<'buf> DisplaySink for InstructionTextSink<'buf> { } } - unsafe { - buf.set_len(buf.len() + printed_size); - } - Ok(()) } /// write a u16 to the output as a base-16 integer. @@ -1315,20 +1320,25 @@ impl<'buf> DisplaySink for InstructionTextSink<'buf> { fn write_u16(&mut self, mut v: u16) -> Result<(), core::fmt::Error> { // we can fairly easily predict the size of a formatted string here with lzcnt, which also // means we can write directly into the correct offsets of the output string. - let mut printed_size = ((16 - v.leading_zeros() + 3) >> 2) as usize; + let printed_size = ((16 - v.leading_zeros() + 3) >> 2) as usize; if printed_size == 0 { - printed_size = 1; + return self.write_fixed_size("0"); } let buf = unsafe { self.buf.as_mut_vec() }; - let p = unsafe { buf.as_mut_ptr().offset(buf.len() as isize) }; - let mut curr = printed_size; + let new_len = buf.len() + printed_size; + + unsafe { + buf.set_len(new_len); + } + let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; + loop { let digit = v % 16; let c = c_to_hex(digit as u8); - curr -= 1; unsafe { - p.offset(curr as isize).write(c); + p = p.offset(-1); + p.write(c); } v = v / 16; if v == 0 { @@ -1336,10 +1346,6 @@ impl<'buf> DisplaySink for InstructionTextSink<'buf> { } } - unsafe { - buf.set_len(buf.len() + printed_size); - } - Ok(()) } /// write a u32 to the output as a base-16 integer. @@ -1351,20 +1357,25 @@ impl<'buf> DisplaySink for InstructionTextSink<'buf> { fn write_u32(&mut self, mut v: u32) -> Result<(), core::fmt::Error> { // we can fairly easily predict the size of a formatted string here with lzcnt, which also // means we can write directly into the correct offsets of the output string. - let mut printed_size = ((32 - v.leading_zeros() + 3) >> 2) as usize; + let printed_size = ((32 - v.leading_zeros() + 3) >> 2) as usize; if printed_size == 0 { - printed_size = 1; + return self.write_fixed_size("0"); } let buf = unsafe { self.buf.as_mut_vec() }; - let p = unsafe { buf.as_mut_ptr().offset(buf.len() as isize) }; - let mut curr = printed_size; + let new_len = buf.len() + printed_size; + + unsafe { + buf.set_len(new_len); + } + let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; + loop { let digit = v % 16; let c = c_to_hex(digit as u8); - curr -= 1; unsafe { - p.offset(curr as isize).write(c); + p = p.offset(-1); + p.write(c); } v = v / 16; if v == 0 { @@ -1372,10 +1383,6 @@ impl<'buf> DisplaySink for InstructionTextSink<'buf> { } } - unsafe { - buf.set_len(buf.len() + printed_size); - } - Ok(()) } /// write a u64 to the output as a base-16 integer. @@ -1387,20 +1394,25 @@ impl<'buf> DisplaySink for InstructionTextSink<'buf> { fn write_u64(&mut self, mut v: u64) -> Result<(), core::fmt::Error> { // we can fairly easily predict the size of a formatted string here with lzcnt, which also // means we can write directly into the correct offsets of the output string. - let mut printed_size = ((64 - v.leading_zeros() + 3) >> 2) as usize; + let printed_size = ((64 - v.leading_zeros() + 3) >> 2) as usize; if printed_size == 0 { - printed_size = 1; + return self.write_fixed_size("0"); } let buf = unsafe { self.buf.as_mut_vec() }; - let p = unsafe { buf.as_mut_ptr().offset(buf.len() as isize) }; - let mut curr = printed_size; + let new_len = buf.len() + printed_size; + + unsafe { + buf.set_len(new_len); + } + let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; + loop { let digit = v % 16; let c = c_to_hex(digit as u8); - curr -= 1; unsafe { - p.offset(curr as isize).write(c); + p = p.offset(-1); + p.write(c); } v = v / 16; if v == 0 { @@ -1408,8 +1420,6 @@ impl<'buf> DisplaySink for InstructionTextSink<'buf> { } } - unsafe { buf.set_len(buf.len() + printed_size); } - Ok(()) } fn span_start(&mut self, _ty: TokenType) {} From 70f767370feb9ca056e4baf32f37c6d8d8235e0c Mon Sep 17 00:00:00 2001 From: iximeow Date: Thu, 20 Jun 2024 13:18:13 -0700 Subject: [PATCH 56/95] swap printed size check and lzcnt if printed_size == 0 then the value must be 0, but we can check if the value is 0 before doing all that stuff --- src/long_mode/display.rs | 48 ++++++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index c69e648..ca5e580 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -891,12 +891,12 @@ impl DisplaySink for alloc::string::String { /// followup step) #[inline(always)] fn write_u8(&mut self, mut v: u8) -> Result<(), core::fmt::Error> { + if v == 0 { + return self.write_fixed_size("0"); + } // we can fairly easily predict the size of a formatted string here with lzcnt, which also // means we can write directly into the correct offsets of the output string. let printed_size = ((8 - v.leading_zeros() + 3) >> 2) as usize; - if printed_size == 0 { - return self.write_fixed_size("0"); - } self.reserve(printed_size); @@ -930,12 +930,12 @@ impl DisplaySink for alloc::string::String { /// followup step) #[inline(always)] fn write_u16(&mut self, mut v: u16) -> Result<(), core::fmt::Error> { + if v == 0 { + return self.write_fixed_size("0"); + } // we can fairly easily predict the size of a formatted string here with lzcnt, which also // means we can write directly into the correct offsets of the output string. let printed_size = ((16 - v.leading_zeros() + 3) >> 2) as usize; - if printed_size == 0 { - return self.write_fixed_size("0"); - } self.reserve(printed_size); @@ -969,12 +969,12 @@ impl DisplaySink for alloc::string::String { /// followup step) #[inline(always)] fn write_u32(&mut self, mut v: u32) -> Result<(), core::fmt::Error> { + if v == 0 { + return self.write_fixed_size("0"); + } // we can fairly easily predict the size of a formatted string here with lzcnt, which also // means we can write directly into the correct offsets of the output string. let printed_size = ((32 - v.leading_zeros() + 3) >> 2) as usize; - if printed_size == 0 { - return self.write_fixed_size("0"); - } self.reserve(printed_size); @@ -1008,12 +1008,12 @@ impl DisplaySink for alloc::string::String { /// followup step) #[inline(always)] fn write_u64(&mut self, mut v: u64) -> Result<(), core::fmt::Error> { + if v == 0 { + return self.write_fixed_size("0"); + } // we can fairly easily predict the size of a formatted string here with lzcnt, which also // means we can write directly into the correct offsets of the output string. let printed_size = ((64 - v.leading_zeros() + 3) >> 2) as usize; - if printed_size == 0 { - return self.write_fixed_size("0"); - } self.reserve(printed_size); @@ -1281,12 +1281,12 @@ impl<'buf> DisplaySink for InstructionTextSink<'buf> { /// followup step) #[inline(always)] fn write_u8(&mut self, mut v: u8) -> Result<(), core::fmt::Error> { + if v == 0 { + return self.write_fixed_size("0"); + } // we can fairly easily predict the size of a formatted string here with lzcnt, which also // means we can write directly into the correct offsets of the output string. let printed_size = ((8 - v.leading_zeros() + 3) >> 2) as usize; - if printed_size == 0 { - return self.write_fixed_size("0"); - } let buf = unsafe { self.buf.as_mut_vec() }; let new_len = buf.len() + printed_size; @@ -1318,12 +1318,12 @@ impl<'buf> DisplaySink for InstructionTextSink<'buf> { /// followup step) #[inline(always)] fn write_u16(&mut self, mut v: u16) -> Result<(), core::fmt::Error> { + if v == 0 { + return self.write_fixed_size("0"); + } // we can fairly easily predict the size of a formatted string here with lzcnt, which also // means we can write directly into the correct offsets of the output string. let printed_size = ((16 - v.leading_zeros() + 3) >> 2) as usize; - if printed_size == 0 { - return self.write_fixed_size("0"); - } let buf = unsafe { self.buf.as_mut_vec() }; let new_len = buf.len() + printed_size; @@ -1355,12 +1355,12 @@ impl<'buf> DisplaySink for InstructionTextSink<'buf> { /// followup step) #[inline(always)] fn write_u32(&mut self, mut v: u32) -> Result<(), core::fmt::Error> { + if v == 0 { + return self.write_fixed_size("0"); + } // we can fairly easily predict the size of a formatted string here with lzcnt, which also // means we can write directly into the correct offsets of the output string. let printed_size = ((32 - v.leading_zeros() + 3) >> 2) as usize; - if printed_size == 0 { - return self.write_fixed_size("0"); - } let buf = unsafe { self.buf.as_mut_vec() }; let new_len = buf.len() + printed_size; @@ -1392,12 +1392,12 @@ impl<'buf> DisplaySink for InstructionTextSink<'buf> { /// followup step) #[inline(always)] fn write_u64(&mut self, mut v: u64) -> Result<(), core::fmt::Error> { + if v == 0 { + return self.write_fixed_size("0"); + } // we can fairly easily predict the size of a formatted string here with lzcnt, which also // means we can write directly into the correct offsets of the output string. let printed_size = ((64 - v.leading_zeros() + 3) >> 2) as usize; - if printed_size == 0 { - return self.write_fixed_size("0"); - } let buf = unsafe { self.buf.as_mut_vec() }; let new_len = buf.len() + printed_size; From e39d6b576da2f25490bf739b61fc8c9f3ab7c2ec Mon Sep 17 00:00:00 2001 From: iximeow Date: Fri, 21 Jun 2024 01:32:12 -0700 Subject: [PATCH 57/95] separate out display code further, reword comments on InstructionTextSink to be ... stern --- src/display.rs | 1020 ++++++++++++++++++++++++++++++++ src/lib.rs | 2 + src/long_mode/display.rs | 1192 ++++---------------------------------- src/long_mode/mod.rs | 2 - 4 files changed, 1122 insertions(+), 1094 deletions(-) create mode 100644 src/display.rs diff --git a/src/display.rs b/src/display.rs new file mode 100644 index 0000000..e495aee --- /dev/null +++ b/src/display.rs @@ -0,0 +1,1020 @@ +use core::fmt; + +use crate::safer_unchecked::unreachable_kinda_unchecked; + +extern crate alloc; + +// TODO: find a better place to put this.... +fn c_to_hex(c: u8) -> u8 { + /* + static CHARSET: &'static [u8; 16] = b"0123456789abcdef"; + CHARSET[c as usize] + */ + // the conditional branch below is faster than a lookup, yes + if c < 10 { + b'0' + c + } else { + b'a' + c - 10 + } +} + +pub enum TokenType { + Mnemonic, + Operand, + Immediate, + Register, + Offset, +} + +/// `DisplaySink` allows client code to collect output and minimal markup. this is currently used +/// in formatting instructions for two reasons: +/// * `DisplaySink` implementations have the opportunity to collect starts and ends of tokens at +/// the same time as collecting output itself. +/// * `DisplaySink` implementations provides specialized functions for writing strings in +/// circumstances where a simple "use `core::fmt`" might incur unwanted overhead. +/// +/// spans are reported through `span_start` and `span_exit` to avoid constraining implementations +/// into tracking current output offset (which may not be knowable) or span size (which may be +/// knowable, but incur additional overhead to compute or track). +/// +/// spans are entered and exited in a FILO manner: a function writing to some `DisplaySink` must +/// exit spans in reverse order to when they are entered. a function sequence like +/// `sink.span_start(Operand); sink.span_start(Immediate); sink.span_exit(Operand)` is in error. +/// +/// the `write_*` helpers on `DisplaySink` may be able to take advantage of contraints described in +/// documentation here to better support writing some kinds of inputs than a fully-general solution +/// (such as `core::fmt`) might be able to yield. +/// +/// currently there are two motivating factors for `write_*` helpers: +/// +/// instruction formatting often involves writing small but variable-size strings, such as register +/// names, which is something of a pathological case for string appending as Rust currently exists: +/// this often becomes `memcpy` and specifically a call to the platform's `memcpy` (rather than an +/// inlined `rep movsb`) just to move 3-5 bytes. one relevant Rust issue for reference: +/// https://github.com/rust-lang/rust/issues/92993#issuecomment-2028915232 +/// +/// there are similar papercuts around formatting integers as base-16 numbers, such as +/// https://github.com/rust-lang/rust/pull/122770 . in isolation and in most applications these are +/// not a significant source of overhead. but for programs bounded on decoding and printing +/// instructions, these can add up to significant overhead - on the order of 10-20% of total +/// runtime. +/// +/// `DisplaySink` +pub trait DisplaySink: fmt::Write { + #[inline(always)] + fn write_fixed_size(&mut self, s: &str) -> Result<(), core::fmt::Error> { + self.write_str(s) + } + + /// write a string to this sink that is less than 32 bytes. this is provided for optimization + /// opportunities when writing a variable-length string with known max size. + /// + /// SAFETY: the provided `s` must be less than 32 bytes. if the provided string is longer than + /// 31 bytes, implementations may only copy part of a multi-byte codepoint while writing to a + /// utf-8 string. this may corrupt Rust strings. + unsafe fn write_lt_32(&mut self, s: &str) -> Result<(), core::fmt::Error> { + self.write_str(s) + } + /// write a string to this sink that is less than 16 bytes. this is provided for optimization + /// opportunities when writing a variable-length string with known max size. + /// + /// SAFETY: the provided `s` must be less than 16 bytes. if the provided string is longer than + /// 15 bytes, implementations may only copy part of a multi-byte codepoint while writing to a + /// utf-8 string. this may corrupt Rust strings. + unsafe fn write_lt_16(&mut self, s: &str) -> Result<(), core::fmt::Error> { + self.write_str(s) + } + /// write a string to this sink that is less than 8 bytes. this is provided for optimization + /// opportunities when writing a variable-length string with known max size. + /// + /// SAFETY: the provided `s` must be less than 8 bytes. if the provided string is longer than + /// 7 bytes, implementations may only copy part of a multi-byte codepoint while writing to a + /// utf-8 string. this may corrupt Rust strings. + unsafe fn write_lt_8(&mut self, s: &str) -> Result<(), core::fmt::Error> { + self.write_str(s) + } + + /// write a u8 to the output as a base-16 integer. + /// + /// this is provided for optimization opportunities when the formatted integer can be written + /// directly to the sink (rather than formatted to an intermediate buffer and output as a + /// followup step) + fn write_u8(&mut self, v: u8) -> Result<(), core::fmt::Error> { + write!(self, "{:x}", v) + } + /// write a u16 to the output as a base-16 integer. + /// + /// this is provided for optimization opportunities when the formatted integer can be written + /// directly to the sink (rather than formatted to an intermediate buffer and output as a + /// followup step) + fn write_u16(&mut self, v: u16) -> Result<(), core::fmt::Error> { + write!(self, "{:x}", v) + } + /// write a u32 to the output as a base-16 integer. + /// + /// this is provided for optimization opportunities when the formatted integer can be written + /// directly to the sink (rather than formatted to an intermediate buffer and output as a + /// followup step) + fn write_u32(&mut self, v: u32) -> Result<(), core::fmt::Error> { + write!(self, "{:x}", v) + } + /// write a u64 to the output as a base-16 integer. + /// + /// this is provided for optimization opportunities when the formatted integer can be written + /// directly to the sink (rather than formatted to an intermediate buffer and output as a + /// followup step) + fn write_u64(&mut self, v: u64) -> Result<(), core::fmt::Error> { + write!(self, "{:x}", v) + } + /// enter a region inside which output corresponds to a `ty`. + /// + /// the default implementation of these functions is as a no-op. this way, providing span + /// information to a `DisplaySink` that does not want it is eliminated at compile time. + /// + /// spans are entered and ended in a FILO manner: a function writing to some `DisplaySink` must + /// end spans in reverse order to when they are entered. a function sequence like + /// `sink.span_start(Operand); sink.span_start(Immediate); sink.span_end(Operand)` is in error. + /// + /// a simple use of `span_start`/`span_end` might look something like: + /// ```compile_fail + /// sink.span_start(Operand) + /// sink.write_char('[') + /// sink.span_start(Register) + /// sink.write_fixed_size("rbp") + /// sink.span_end(Register) + /// sink.write_char(']') + /// sink.span_end(Operand) + /// ``` + /// which writes the text `[rbp]`, with span indicators where the operand (`[ ... ]`) begins, + /// as well as the start and end of a register name. + fn span_start(&mut self, _ty: TokenType) { } + /// end a region where a `ty` was written. see docs on [`DisplaySink::span_start`] for more. + fn span_end(&mut self, _ty: TokenType) { } +} + +pub struct NoColorsSink<'a, T: fmt::Write> { + pub out: &'a mut T, +} + +impl<'a, T: fmt::Write> DisplaySink for NoColorsSink<'a, T> { + fn span_start(&mut self, _ty: TokenType) { } + fn span_end(&mut self, _ty: TokenType) { } +} + +impl<'a, T: fmt::Write> fmt::Write for NoColorsSink<'a, T> { + fn write_str(&mut self, s: &str) -> Result<(), core::fmt::Error> { + self.out.write_str(s) + } + fn write_char(&mut self, c: char) -> Result<(), core::fmt::Error> { + self.out.write_char(c) + } + fn write_fmt(&mut self, f: fmt::Arguments) -> Result<(), core::fmt::Error> { + self.out.write_fmt(f) + } +} + +/// this is an implementation detail of yaxpeax-arch and related crates. if you are a user of the +/// disassemblers, do not use this struct. do not depend on this struct existing. this struct is +/// not stable. this struct is not safe for general use. if you use this struct you and your +/// program will be eaten by gremlins. +/// +/// if you are implementing an instruction formatter for the yaxpeax family of crates: this struct +/// is guaranteed to contain a string that is long enough to hold a fully-formatted instruction. +/// because the buffer is guaranteed to be long enough, writes through `InstructionTextSink` are +/// not bounds-checked, and the buffer is never grown. +/// +/// this is wildly dangerous in general use. the public constructor of `InstructionTextSink` is +/// unsafe as a result. as used in `InstructionFormatter`, the buffer is guaranteed to be +/// `clear()`ed before use, `InstructionFormatter` ensures the buffer is large enough, *and* +/// `InstructionFormatter` never allows `InstructionTextSink` to exist in a context where it would +/// be written to without being rewound first. +/// +/// because this opens a very large hole through which `fmt::Write` can become unsafe, incorrect +/// uses of this struct will be hard to debug in general. `InstructionFormatter` is probably at the +/// limit of easily-reasoned-about lifecycle of the buffer, which "only" leaves the problem of +/// ensuring that instruction formatting impls this buffer is passed to are appropriately sized. +/// +/// this is intended to be hidden in docs. if you see this in docs, it's a bug. +#[doc(hidden)] +pub(crate) struct InstructionTextSink<'buf> { + buf: &'buf mut alloc::string::String +} + +impl<'buf> InstructionTextSink<'buf> { + pub unsafe fn new(buf: &'buf mut alloc::string::String) -> Self { + Self { buf } + } +} + +impl<'buf> fmt::Write for InstructionTextSink<'buf> { + fn write_str(&mut self, s: &str) -> Result<(), core::fmt::Error> { + self.buf.write_str(s) + } + fn write_char(&mut self, c: char) -> Result<(), core::fmt::Error> { + // SAFETY: `buf` is assumed to be long enough to hold all input, `buf` at `underlying.len()` + // is valid for writing, but may be uninitialized. + // + // this function is essentially equivalent to `Vec::push` specialized for the case that + // `len < buf.capacity()`: + // https://github.com/rust-lang/rust/blob/be9e27e/library/alloc/src/vec/mod.rs#L1993-L2006 + unsafe { + let underlying = self.buf.as_mut_vec(); + // `InstructionTextSink::write_char` is only used by yaxpeax-x86, and is only used to + // write single ASCII characters. this is wrong in the general case, but `write_char` + // here is not going to be used in the general case. + if cfg!(debug_asertions) { + panic!("InstructionTextSink::write_char would truncate output"); + } + let to_push = c as u8; + // `ptr::write` here because `underlying.add(underlying.len())` may not point to an + // initialized value, which would mean that turning that pointer into a `&mut u8` to + // store through would be UB. `ptr::write` avoids taking the mut ref. + underlying.as_mut_ptr().offset(underlying.len() as isize).write(to_push); + // we have initialized all (one) bytes that `set_len` is increasing the length to + // include. + underlying.set_len(underlying.len() + 1); + } + Ok(()) + } +} + +/// this DisplaySink impl exists to support somewhat more performant buffering of the kinds of +/// strings `yaxpeax-x86` uses in formatting instructions. +impl DisplaySink for alloc::string::String { + #[inline(always)] + fn write_fixed_size(&mut self, s: &str) -> Result<(), core::fmt::Error> { + self.reserve(s.len()); + let buf = unsafe { self.as_mut_vec() }; + let new_bytes = s.as_bytes(); + + if new_bytes.len() == 0 { + unsafe { unreachable_kinda_unchecked() } + } + + if new_bytes.len() >= 16 { + unsafe { unreachable_kinda_unchecked() } + } + + unsafe { + let dest = buf.as_mut_ptr().offset(buf.len() as isize); + + // this used to be enough to bamboozle llvm away from + // https://github.com/rust-lang/rust/issues/92993#issuecomment-2028915232 + // if `s` is not fixed size. somewhere between Rust 1.68 and Rust 1.74 this stopped + // being sufficient, so `write_fixed_size` truly should only be used for fixed size `s` + // (otherwise this is a libc memcpy call in disguise). for fixed-size strings this + // unrolls into some kind of appropriate series of `mov`. + dest.offset(0 as isize).write(new_bytes[0]); + for i in 1..new_bytes.len() { + dest.offset(i as isize).write(new_bytes[i]); + } + + buf.set_len(buf.len() + new_bytes.len()); + } + + Ok(()) + } + unsafe fn write_lt_32(&mut self, s: &str) -> Result<(), fmt::Error> { + self.reserve(s.len()); + + // SAFETY: todo + let buf = unsafe { self.as_mut_vec() }; + let new_bytes = s.as_bytes(); + + // should get DCE + if new_bytes.len() >= 32 { + unsafe { core::hint::unreachable_unchecked() } + } + + unsafe { + let dest = buf.as_mut_ptr().offset(buf.len() as isize); + let src = new_bytes.as_ptr(); + + let rem = new_bytes.len() as isize; + + // set_len early because there is no way to avoid the following asm!() writing that + // same number of bytes into buf + buf.set_len(buf.len() + new_bytes.len()); + + core::arch::asm!( + "6:", + "cmp {rem:e}, 16", + "jb 7f", + "mov {buf:r}, qword ptr [{src} + {rem} - 16]", + "mov qword ptr [{dest} + {rem} - 16], {buf:r}", + "mov {buf:r}, qword ptr [{src} + {rem} - 8]", + "mov qword ptr [{dest} + {rem} - 8], {buf:r}", + "sub {rem:e}, 16", + "jz 11f", + "7:", + "cmp {rem:e}, 8", + "jb 8f", + "mov {buf:r}, qword ptr [{src} + {rem} - 8]", + "mov qword ptr [{dest} + {rem} - 8], {buf:r}", + "sub {rem:e}, 8", + "jz 11f", + "8:", + "cmp {rem:e}, 4", + "jb 9f", + "mov {buf:e}, dword ptr [{src} + {rem} - 4]", + "mov dword ptr [{dest} + {rem} - 4], {buf:e}", + "sub {rem:e}, 4", + "jz 11f", + "9:", + "cmp {rem:e}, 2", + "jb 10f", + "mov {buf:x}, word ptr [{src} + {rem} - 2]", + "mov word ptr [{dest} + {rem} - 2], {buf:x}", + "sub {rem:e}, 2", + "jz 11f", + "10:", + "cmp {rem:e}, 1", + "jb 11f", + "mov {buf:l}, byte ptr [{src} + {rem} - 1]", + "mov byte ptr [{dest} + {rem} - 1], {buf:l}", + "11:", + src = in(reg) src, + dest = in(reg) dest, + rem = inout(reg) rem => _, + buf = out(reg) _, + options(nostack), + ); + } + /* + for i in 0..new_bytes.len() { + unsafe { + buf.as_mut_ptr().offset(buf.len() as isize).offset(i as isize).write_volatile(new_bytes[i]); + } + } + */ + + Ok(()) + } + unsafe fn write_lt_16(&mut self, s: &str) -> Result<(), fmt::Error> { + self.reserve(s.len()); + + // SAFETY: todo + let buf = unsafe { self.as_mut_vec() }; + let new_bytes = s.as_bytes(); + + // should get DCE + if new_bytes.len() >= 16 { + unsafe { core::hint::unreachable_unchecked() } + } + + unsafe { + let dest = buf.as_mut_ptr().offset(buf.len() as isize); + let src = new_bytes.as_ptr(); + + let rem = new_bytes.len() as isize; + + // set_len early because there is no way to avoid the following asm!() writing that + // same number of bytes into buf + buf.set_len(buf.len() + new_bytes.len()); + + core::arch::asm!( + "7:", + "cmp {rem:e}, 8", + "jb 8f", + "mov {buf:r}, qword ptr [{src} + {rem} - 8]", + "mov qword ptr [{dest} + {rem} - 8], {buf:r}", + "sub {rem:e}, 8", + "jz 11f", + "8:", + "cmp {rem:e}, 4", + "jb 9f", + "mov {buf:e}, dword ptr [{src} + {rem} - 4]", + "mov dword ptr [{dest} + {rem} - 4], {buf:e}", + "sub {rem:e}, 4", + "jz 11f", + "9:", + "cmp {rem:e}, 2", + "jb 10f", + "mov {buf:x}, word ptr [{src} + {rem} - 2]", + "mov word ptr [{dest} + {rem} - 2], {buf:x}", + "sub {rem:e}, 2", + "jz 11f", + "10:", + "cmp {rem:e}, 1", + "jb 11f", + "mov {buf:l}, byte ptr [{src} + {rem} - 1]", + "mov byte ptr [{dest} + {rem} - 1], {buf:l}", + "11:", + src = in(reg) src, + dest = in(reg) dest, + rem = inout(reg) rem => _, + buf = out(reg) _, + options(nostack), + ); + } + /* + for i in 0..new_bytes.len() { + unsafe { + buf.as_mut_ptr().offset(buf.len() as isize).offset(i as isize).write_volatile(new_bytes[i]); + } + } + */ + + Ok(()) + } + unsafe fn write_lt_8(&mut self, s: &str) -> Result<(), fmt::Error> { + self.reserve(s.len()); + + // SAFETY: todo + let buf = unsafe { self.as_mut_vec() }; + let new_bytes = s.as_bytes(); + + // should get DCE + if new_bytes.len() >= 8 { + unsafe { core::hint::unreachable_unchecked() } + } + + unsafe { + let dest = buf.as_mut_ptr().offset(buf.len() as isize); + let src = new_bytes.as_ptr(); + + let rem = new_bytes.len() as isize; + + // set_len early because there is no way to avoid the following asm!() writing that + // same number of bytes into buf + buf.set_len(buf.len() + new_bytes.len()); + + core::arch::asm!( + "8:", + "cmp {rem:e}, 4", + "jb 9f", + "mov {buf:e}, dword ptr [{src} + {rem} - 4]", + "mov dword ptr [{dest} + {rem} - 4], {buf:e}", + "sub {rem:e}, 4", + "jz 11f", + "9:", + "cmp {rem:e}, 2", + "jb 10f", + "mov {buf:x}, word ptr [{src} + {rem} - 2]", + "mov word ptr [{dest} + {rem} - 2], {buf:x}", + "sub {rem:e}, 2", + "jz 11f", + "10:", + "cmp {rem:e}, 1", + "jb 11f", + "mov {buf:l}, byte ptr [{src} + {rem} - 1]", + "mov byte ptr [{dest} + {rem} - 1], {buf:l}", + "11:", + src = in(reg) src, + dest = in(reg) dest, + rem = inout(reg) rem => _, + buf = out(reg) _, + options(nostack), + ); + } + /* + for i in 0..new_bytes.len() { + unsafe { + buf.as_mut_ptr().offset(buf.len() as isize).offset(i as isize).write_volatile(new_bytes[i]); + } + } + */ + + Ok(()) + } + /// write a u8 to the output as a base-16 integer. + /// + /// this is provided for optimization opportunities when the formatted integer can be written + /// directly to the sink (rather than formatted to an intermediate buffer and output as a + /// followup step) + #[inline(always)] + fn write_u8(&mut self, mut v: u8) -> Result<(), core::fmt::Error> { + if v == 0 { + return self.write_fixed_size("0"); + } + // we can fairly easily predict the size of a formatted string here with lzcnt, which also + // means we can write directly into the correct offsets of the output string. + let printed_size = ((8 - v.leading_zeros() + 3) >> 2) as usize; + + self.reserve(printed_size); + + let buf = unsafe { self.as_mut_vec() }; + let new_len = buf.len() + printed_size; + + unsafe { + buf.set_len(new_len); + } + let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; + + loop { + let digit = v % 16; + let c = c_to_hex(digit as u8); + unsafe { + p = p.offset(-1); + p.write(c); + } + v = v / 16; + if v == 0 { + break; + } + } + + Ok(()) + } + /// write a u16 to the output as a base-16 integer. + /// + /// this is provided for optimization opportunities when the formatted integer can be written + /// directly to the sink (rather than formatted to an intermediate buffer and output as a + /// followup step) + #[inline(always)] + fn write_u16(&mut self, mut v: u16) -> Result<(), core::fmt::Error> { + if v == 0 { + return self.write_fixed_size("0"); + } + // we can fairly easily predict the size of a formatted string here with lzcnt, which also + // means we can write directly into the correct offsets of the output string. + let printed_size = ((16 - v.leading_zeros() + 3) >> 2) as usize; + + self.reserve(printed_size); + + let buf = unsafe { self.as_mut_vec() }; + let new_len = buf.len() + printed_size; + + unsafe { + buf.set_len(new_len); + } + let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; + + loop { + let digit = v % 16; + let c = c_to_hex(digit as u8); + unsafe { + p = p.offset(-1); + p.write(c); + } + v = v / 16; + if v == 0 { + break; + } + } + + Ok(()) + } + /// write a u32 to the output as a base-16 integer. + /// + /// this is provided for optimization opportunities when the formatted integer can be written + /// directly to the sink (rather than formatted to an intermediate buffer and output as a + /// followup step) + #[inline(always)] + fn write_u32(&mut self, mut v: u32) -> Result<(), core::fmt::Error> { + if v == 0 { + return self.write_fixed_size("0"); + } + // we can fairly easily predict the size of a formatted string here with lzcnt, which also + // means we can write directly into the correct offsets of the output string. + let printed_size = ((32 - v.leading_zeros() + 3) >> 2) as usize; + + self.reserve(printed_size); + + let buf = unsafe { self.as_mut_vec() }; + let new_len = buf.len() + printed_size; + + unsafe { + buf.set_len(new_len); + } + let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; + + loop { + let digit = v % 16; + let c = c_to_hex(digit as u8); + unsafe { + p = p.offset(-1); + p.write(c); + } + v = v / 16; + if v == 0 { + break; + } + } + + Ok(()) + } + /// write a u64 to the output as a base-16 integer. + /// + /// this is provided for optimization opportunities when the formatted integer can be written + /// directly to the sink (rather than formatted to an intermediate buffer and output as a + /// followup step) + #[inline(always)] + fn write_u64(&mut self, mut v: u64) -> Result<(), core::fmt::Error> { + if v == 0 { + return self.write_fixed_size("0"); + } + // we can fairly easily predict the size of a formatted string here with lzcnt, which also + // means we can write directly into the correct offsets of the output string. + let printed_size = ((64 - v.leading_zeros() + 3) >> 2) as usize; + + self.reserve(printed_size); + + let buf = unsafe { self.as_mut_vec() }; + let new_len = buf.len() + printed_size; + + unsafe { + buf.set_len(new_len); + } + let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; + + loop { + let digit = v % 16; + let c = c_to_hex(digit as u8); + unsafe { + p = p.offset(-1); + p.write(c); + } + v = v / 16; + if v == 0 { + break; + } + } + + Ok(()) + } + fn span_start(&mut self, _ty: TokenType) {} + fn span_end(&mut self, _ty: TokenType) {} +} + +impl<'buf> DisplaySink for InstructionTextSink<'buf> { + #[inline(always)] + fn write_fixed_size(&mut self, s: &str) -> Result<(), core::fmt::Error> { + let buf = unsafe { self.buf.as_mut_vec() }; + let new_bytes = s.as_bytes(); + + if new_bytes.len() == 0 { + return Ok(()); + } + + if new_bytes.len() >= 16 { + unsafe { unreachable_kinda_unchecked() } + } + + unsafe { + let dest = buf.as_mut_ptr().offset(buf.len() as isize); + + // this used to be enough to bamboozle llvm away from + // https://github.com/rust-lang/rust/issues/92993#issuecomment-2028915232https://github.com/rust-lang/rust/issues/92993#issuecomment-2028915232 + // if `s` is not fixed size. somewhere between Rust 1.68 and Rust 1.74 this stopped + // being sufficient, so `write_fixed_size` truly should only be used for fixed size `s` + // (otherwise this is a libc memcpy call in disguise). for fixed-size strings this + // unrolls into some kind of appropriate series of `mov`. + dest.offset(0 as isize).write(new_bytes[0]); + for i in 1..new_bytes.len() { + dest.offset(i as isize).write(new_bytes[i]); + } + + buf.set_len(buf.len() + new_bytes.len()); + } + + Ok(()) + } + unsafe fn write_lt_32(&mut self, s: &str) -> Result<(), fmt::Error> { + // SAFETY: todo + let buf = unsafe { self.buf.as_mut_vec() }; + let new_bytes = s.as_bytes(); + + // should get DCE + if new_bytes.len() >= 32 { + unsafe { core::hint::unreachable_unchecked() } + } + + unsafe { + let dest = buf.as_mut_ptr().offset(buf.len() as isize); + let src = new_bytes.as_ptr(); + + let rem = new_bytes.len() as isize; + + // set_len early because there is no way to avoid the following asm!() writing that + // same number of bytes into buf + buf.set_len(buf.len() + new_bytes.len()); + + core::arch::asm!( + "6:", + "cmp {rem:e}, 16", + "jb 7f", + "mov {buf:r}, qword ptr [{src} + {rem} - 16]", + "mov qword ptr [{dest} + {rem} - 16], {buf:r}", + "mov {buf:r}, qword ptr [{src} + {rem} - 8]", + "mov qword ptr [{dest} + {rem} - 8], {buf:r}", + "sub {rem:e}, 16", + "jz 11f", + "7:", + "cmp {rem:e}, 8", + "jb 8f", + "mov {buf:r}, qword ptr [{src} + {rem} - 8]", + "mov qword ptr [{dest} + {rem} - 8], {buf:r}", + "sub {rem:e}, 8", + "jz 11f", + "8:", + "cmp {rem:e}, 4", + "jb 9f", + "mov {buf:e}, dword ptr [{src} + {rem} - 4]", + "mov dword ptr [{dest} + {rem} - 4], {buf:e}", + "sub {rem:e}, 4", + "jz 11f", + "9:", + "cmp {rem:e}, 2", + "jb 10f", + "mov {buf:x}, word ptr [{src} + {rem} - 2]", + "mov word ptr [{dest} + {rem} - 2], {buf:x}", + "sub {rem:e}, 2", + "jz 11f", + "10:", + "cmp {rem:e}, 1", + "jb 11f", + "mov {buf:l}, byte ptr [{src} + {rem} - 1]", + "mov byte ptr [{dest} + {rem} - 1], {buf:l}", + "11:", + src = in(reg) src, + dest = in(reg) dest, + rem = inout(reg) rem => _, + buf = out(reg) _, + options(nostack), + ); + } + /* + for i in 0..new_bytes.len() { + unsafe { + buf.as_mut_ptr().offset(buf.len() as isize).offset(i as isize).write_volatile(new_bytes[i]); + } + } + */ + + Ok(()) + } + unsafe fn write_lt_16(&mut self, s: &str) -> Result<(), fmt::Error> { + // SAFETY: todo + let buf = unsafe { self.buf.as_mut_vec() }; + let new_bytes = s.as_bytes(); + + // should get DCE + if new_bytes.len() >= 16 { + unsafe { core::hint::unreachable_unchecked() } + } + + unsafe { + let dest = buf.as_mut_ptr().offset(buf.len() as isize); + let src = new_bytes.as_ptr(); + + let rem = new_bytes.len() as isize; + + // set_len early because there is no way to avoid the following asm!() writing that + // same number of bytes into buf + buf.set_len(buf.len() + new_bytes.len()); + + core::arch::asm!( + "7:", + "cmp {rem:e}, 8", + "jb 8f", + "mov {buf:r}, qword ptr [{src} + {rem} - 8]", + "mov qword ptr [{dest} + {rem} - 8], {buf:r}", + "sub {rem:e}, 8", + "jz 11f", + "8:", + "cmp {rem:e}, 4", + "jb 9f", + "mov {buf:e}, dword ptr [{src} + {rem} - 4]", + "mov dword ptr [{dest} + {rem} - 4], {buf:e}", + "sub {rem:e}, 4", + "jz 11f", + "9:", + "cmp {rem:e}, 2", + "jb 10f", + "mov {buf:x}, word ptr [{src} + {rem} - 2]", + "mov word ptr [{dest} + {rem} - 2], {buf:x}", + "sub {rem:e}, 2", + "jz 11f", + "10:", + "cmp {rem:e}, 1", + "jb 11f", + "mov {buf:l}, byte ptr [{src} + {rem} - 1]", + "mov byte ptr [{dest} + {rem} - 1], {buf:l}", + "11:", + src = in(reg) src, + dest = in(reg) dest, + rem = inout(reg) rem => _, + buf = out(reg) _, + options(nostack), + ); + } + /* + for i in 0..new_bytes.len() { + unsafe { + buf.as_mut_ptr().offset(buf.len() as isize).offset(i as isize).write_volatile(new_bytes[i]); + } + } + */ + + Ok(()) + } + unsafe fn write_lt_8(&mut self, s: &str) -> Result<(), fmt::Error> { + // SAFETY: todo + let buf = unsafe { self.buf.as_mut_vec() }; + let new_bytes = s.as_bytes(); + + // should get DCE + if new_bytes.len() >= 8 { + unsafe { core::hint::unreachable_unchecked() } + } + + unsafe { + let dest = buf.as_mut_ptr().offset(buf.len() as isize); + let src = new_bytes.as_ptr(); + + let rem = new_bytes.len() as isize; + + // set_len early because there is no way to avoid the following asm!() writing that + // same number of bytes into buf + buf.set_len(buf.len() + new_bytes.len()); + + core::arch::asm!( + "8:", + "cmp {rem:e}, 4", + "jb 9f", + "mov {buf:e}, dword ptr [{src} + {rem} - 4]", + "mov dword ptr [{dest} + {rem} - 4], {buf:e}", + "sub {rem:e}, 4", + "jz 11f", + "9:", + "cmp {rem:e}, 2", + "jb 10f", + "mov {buf:x}, word ptr [{src} + {rem} - 2]", + "mov word ptr [{dest} + {rem} - 2], {buf:x}", + "sub {rem:e}, 2", + "jz 11f", + "10:", + "cmp {rem:e}, 1", + "jb 11f", + "mov {buf:l}, byte ptr [{src} + {rem} - 1]", + "mov byte ptr [{dest} + {rem} - 1], {buf:l}", + "11:", + src = in(reg) src, + dest = in(reg) dest, + rem = inout(reg) rem => _, + buf = out(reg) _, + options(nostack), + ); + } + /* + for i in 0..new_bytes.len() { + unsafe { + buf.as_mut_ptr().offset(buf.len() as isize).offset(i as isize).write_volatile(new_bytes[i]); + } + } + */ + + Ok(()) + } + /// write a u8 to the output as a base-16 integer. + /// + /// this is provided for optimization opportunities when the formatted integer can be written + /// directly to the sink (rather than formatted to an intermediate buffer and output as a + /// followup step) + #[inline(always)] + fn write_u8(&mut self, mut v: u8) -> Result<(), core::fmt::Error> { + if v == 0 { + return self.write_fixed_size("0"); + } + // we can fairly easily predict the size of a formatted string here with lzcnt, which also + // means we can write directly into the correct offsets of the output string. + let printed_size = ((8 - v.leading_zeros() + 3) >> 2) as usize; + + let buf = unsafe { self.buf.as_mut_vec() }; + let new_len = buf.len() + printed_size; + + unsafe { + buf.set_len(new_len); + } + let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; + + loop { + let digit = v % 16; + let c = c_to_hex(digit as u8); + unsafe { + p = p.offset(-1); + p.write(c); + } + v = v / 16; + if v == 0 { + break; + } + } + + Ok(()) + } + /// write a u16 to the output as a base-16 integer. + /// + /// this is provided for optimization opportunities when the formatted integer can be written + /// directly to the sink (rather than formatted to an intermediate buffer and output as a + /// followup step) + #[inline(always)] + fn write_u16(&mut self, mut v: u16) -> Result<(), core::fmt::Error> { + if v == 0 { + return self.write_fixed_size("0"); + } + // we can fairly easily predict the size of a formatted string here with lzcnt, which also + // means we can write directly into the correct offsets of the output string. + let printed_size = ((16 - v.leading_zeros() + 3) >> 2) as usize; + + let buf = unsafe { self.buf.as_mut_vec() }; + let new_len = buf.len() + printed_size; + + unsafe { + buf.set_len(new_len); + } + let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; + + loop { + let digit = v % 16; + let c = c_to_hex(digit as u8); + unsafe { + p = p.offset(-1); + p.write(c); + } + v = v / 16; + if v == 0 { + break; + } + } + + Ok(()) + } + /// write a u32 to the output as a base-16 integer. + /// + /// this is provided for optimization opportunities when the formatted integer can be written + /// directly to the sink (rather than formatted to an intermediate buffer and output as a + /// followup step) + #[inline(always)] + fn write_u32(&mut self, mut v: u32) -> Result<(), core::fmt::Error> { + if v == 0 { + return self.write_fixed_size("0"); + } + // we can fairly easily predict the size of a formatted string here with lzcnt, which also + // means we can write directly into the correct offsets of the output string. + let printed_size = ((32 - v.leading_zeros() + 3) >> 2) as usize; + + let buf = unsafe { self.buf.as_mut_vec() }; + let new_len = buf.len() + printed_size; + + unsafe { + buf.set_len(new_len); + } + let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; + + loop { + let digit = v % 16; + let c = c_to_hex(digit as u8); + unsafe { + p = p.offset(-1); + p.write(c); + } + v = v / 16; + if v == 0 { + break; + } + } + + Ok(()) + } + /// write a u64 to the output as a base-16 integer. + /// + /// this is provided for optimization opportunities when the formatted integer can be written + /// directly to the sink (rather than formatted to an intermediate buffer and output as a + /// followup step) + #[inline(always)] + fn write_u64(&mut self, mut v: u64) -> Result<(), core::fmt::Error> { + if v == 0 { + return self.write_fixed_size("0"); + } + // we can fairly easily predict the size of a formatted string here with lzcnt, which also + // means we can write directly into the correct offsets of the output string. + let printed_size = ((64 - v.leading_zeros() + 3) >> 2) as usize; + + let buf = unsafe { self.buf.as_mut_vec() }; + let new_len = buf.len() + printed_size; + + unsafe { + buf.set_len(new_len); + } + let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; + + loop { + let digit = v % 16; + let c = c_to_hex(digit as u8); + unsafe { + p = p.offset(-1); + p.write(c); + } + v = v / 16; + if v == 0 { + break; + } + } + + Ok(()) + } + fn span_start(&mut self, _ty: TokenType) {} + fn span_end(&mut self, _ty: TokenType) {} +} diff --git a/src/lib.rs b/src/lib.rs index a7b8531..709563b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -139,6 +139,8 @@ pub mod real_mode; pub use real_mode::Arch as x86_16; mod safer_unchecked; +#[cfg(feature = "fmt")] +mod display; const MEM_SIZE_STRINGS: [&'static str; 65] = [ "BUG", diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index ca5e580..d9799e1 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -4,10 +4,12 @@ use yaxpeax_arch::{Colorize, ShowContextual, NoColors, YaxColors}; use yaxpeax_arch::display::*; use crate::safer_unchecked::GetSaferUnchecked as _; -use crate::safer_unchecked::unreachable_kinda_unchecked; use crate::MEM_SIZE_STRINGS; use crate::long_mode::{RegSpec, Opcode, Operand, MergeMode, InstDecoder, Instruction, Segment, PrefixRex, OperandSpec}; +use crate::display::DisplaySink; +use crate::display::TokenType; + impl fmt::Display for InstDecoder { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { if self == &InstDecoder::default() { @@ -363,1069 +365,6 @@ impl Colorize for Operand { } } -pub enum TokenType { - Mnemonic, - Operand, - Immediate, - Register, - Offset, -} - -/// `DisplaySink` allows client code to collect output and minimal markup. this is currently used -/// in formatting instructions for two reasons: -/// * `DisplaySink` implementations have the opportunity to collect starts and ends of tokens at -/// the same time as collecting output itself. -/// * `DisplaySink` implementations provides specialized functions for writing strings in -/// circumstances where a simple "use `core::fmt`" might incur unwanted overhead. -/// -/// spans are reported through `span_start` and `span_exit` to avoid constraining implementations -/// into tracking current output offset (which may not be knowable) or span size (which may be -/// knowable, but incur additional overhead to compute or track). -/// -/// spans are entered and exited in a FILO manner: a function writing to some `DisplaySink` must -/// exit spans in reverse order to when they are entered. a function sequence like -/// `sink.span_start(Operand); sink.span_start(Immediate); sink.span_exit(Operand)` is in error. -/// -/// the `write_*` helpers on `DisplaySink` may be able to take advantage of contraints described in -/// documentation here to better support writing some kinds of inputs than a fully-general solution -/// (such as `core::fmt`) might be able to yield. -/// -/// currently there are two motivating factors for `write_*` helpers: -/// -/// instruction formatting often involves writing small but variable-size strings, such as register -/// names, which is something of a pathological case for string appending as Rust currently exists: -/// this often becomes `memcpy` and specifically a call to the platform's `memcpy` (rather than an -/// inlined `rep movsb`) just to move 3-5 bytes. one relevant Rust issue for reference: -/// https://github.com/rust-lang/rust/issues/92993#issuecomment-2028915232 -/// -/// there are similar papercuts around formatting integers as base-16 numbers, such as -/// https://github.com/rust-lang/rust/pull/122770 . in isolation and in most applications these are -/// not a significant source of overhead. but for programs bounded on decoding and printing -/// instructions, these can add up to significant overhead - on the order of 10-20% of total -/// runtime. -/// -/// `DisplaySink` -pub trait DisplaySink: fmt::Write { - #[inline(always)] - fn write_fixed_size(&mut self, s: &str) -> Result<(), core::fmt::Error> { - self.write_str(s) - } - - /// write a string to this sink that is less than 32 bytes. this is provided for optimization - /// opportunities when writing a variable-length string with known max size. - /// - /// SAFETY: the provided `s` must be less than 32 bytes. if the provided string is longer than - /// 31 bytes, implementations may only copy part of a multi-byte codepoint while writing to a - /// utf-8 string. this may corrupt Rust strings. - unsafe fn write_lt_32(&mut self, s: &str) -> Result<(), core::fmt::Error> { - self.write_str(s) - } - /// write a string to this sink that is less than 16 bytes. this is provided for optimization - /// opportunities when writing a variable-length string with known max size. - /// - /// SAFETY: the provided `s` must be less than 16 bytes. if the provided string is longer than - /// 15 bytes, implementations may only copy part of a multi-byte codepoint while writing to a - /// utf-8 string. this may corrupt Rust strings. - unsafe fn write_lt_16(&mut self, s: &str) -> Result<(), core::fmt::Error> { - self.write_str(s) - } - /// write a string to this sink that is less than 8 bytes. this is provided for optimization - /// opportunities when writing a variable-length string with known max size. - /// - /// SAFETY: the provided `s` must be less than 8 bytes. if the provided string is longer than - /// 7 bytes, implementations may only copy part of a multi-byte codepoint while writing to a - /// utf-8 string. this may corrupt Rust strings. - unsafe fn write_lt_8(&mut self, s: &str) -> Result<(), core::fmt::Error> { - self.write_str(s) - } - - /// write a u8 to the output as a base-16 integer. - /// - /// this is provided for optimization opportunities when the formatted integer can be written - /// directly to the sink (rather than formatted to an intermediate buffer and output as a - /// followup step) - fn write_u8(&mut self, v: u8) -> Result<(), core::fmt::Error> { - write!(self, "{:x}", v) - } - /// write a u16 to the output as a base-16 integer. - /// - /// this is provided for optimization opportunities when the formatted integer can be written - /// directly to the sink (rather than formatted to an intermediate buffer and output as a - /// followup step) - fn write_u16(&mut self, v: u16) -> Result<(), core::fmt::Error> { - write!(self, "{:x}", v) - } - /// write a u32 to the output as a base-16 integer. - /// - /// this is provided for optimization opportunities when the formatted integer can be written - /// directly to the sink (rather than formatted to an intermediate buffer and output as a - /// followup step) - fn write_u32(&mut self, v: u32) -> Result<(), core::fmt::Error> { - write!(self, "{:x}", v) - } - /// write a u64 to the output as a base-16 integer. - /// - /// this is provided for optimization opportunities when the formatted integer can be written - /// directly to the sink (rather than formatted to an intermediate buffer and output as a - /// followup step) - fn write_u64(&mut self, v: u64) -> Result<(), core::fmt::Error> { - write!(self, "{:x}", v) - } - /// enter a region inside which output corresponds to a `ty`. - /// - /// the default implementation of these functions is as a no-op. this way, providing span - /// information to a `DisplaySink` that does not want it is eliminated at compile time. - /// - /// spans are entered and ended in a FILO manner: a function writing to some `DisplaySink` must - /// end spans in reverse order to when they are entered. a function sequence like - /// `sink.span_start(Operand); sink.span_start(Immediate); sink.span_end(Operand)` is in error. - /// - /// a simple use of `span_start`/`span_end` might look something like: - /// ```compile_fail - /// sink.span_start(Operand) - /// sink.write_char('[') - /// sink.span_start(Register) - /// sink.write_fixed_size("rbp") - /// sink.span_end(Register) - /// sink.write_char(']') - /// sink.span_end(Operand) - /// ``` - /// which writes the text `[rbp]`, with span indicators where the operand (`[ ... ]`) begins, - /// as well as the start and end of a register name. - fn span_start(&mut self, _ty: TokenType) { } - /// end a region where a `ty` was written. see docs on [`DisplaySink::span_start`] for more. - fn span_end(&mut self, _ty: TokenType) { } -} - -pub struct NoColorsSink<'a, T: fmt::Write> { - pub out: &'a mut T, -} - -impl<'a, T: fmt::Write> DisplaySink for NoColorsSink<'a, T> { - fn span_start(&mut self, _ty: TokenType) { } - fn span_end(&mut self, _ty: TokenType) { } -} - -impl<'a, T: fmt::Write> fmt::Write for NoColorsSink<'a, T> { - fn write_str(&mut self, s: &str) -> Result<(), core::fmt::Error> { - self.out.write_str(s) - } - fn write_char(&mut self, c: char) -> Result<(), core::fmt::Error> { - self.out.write_char(c) - } - fn write_fmt(&mut self, f: fmt::Arguments) -> Result<(), core::fmt::Error> { - self.out.write_fmt(f) - } -} - -/// helper to format `amd64` instructions with highest throughupt and least configuration. -/// -/// ### when to use this over `fmt::Display`? -/// -/// `fmt::Display` is a fair choice in most cases. in some cases, `InstructionFormatter` may -/// support formatting options that may be difficult to configure for a `Display` impl. -/// additionally, `InstructionFormatter` may be able to specialize more effectively where -/// `fmt::Display`, writing to a generic `fmt::Write`, may not. -/// -/// if your use case for `yaxpeax-x86` involves being bounded on the speed of disassembling and -/// formatting instructions, [`InstructionFormatter::format_inst`] has been measured as up to 11% -/// faster than an equivalent `write!(buf, "{}", inst)`. -/// -/// `InstructionFormatter` involves internal allocations; if your use case for `yaxpeax-x86` -/// requires allocations never occurring, it is not an appropriate tool. -/// -/// ### example -/// -/// ``` -/// use yaxpeax_x86::long_mode::InstDecoder; -/// use yaxpeax_x86::long_mode::InstructionFormatter; -/// -/// let bytes = &[0x33, 0xc0]; -/// let inst = InstDecoder::default().decode_slice(bytes).expect("can decode"); -/// let mut formatter = InstructionFormatter::new(); -/// assert_eq!( -/// formatter.format_inst(&inst).expect("can format"), -/// "xor eax, eax" -/// ); -/// -/// // or, getting the formatted instruction with `text_str`: -/// assert_eq!( -/// formatter.text_str(), -/// "xor eax, eax" -/// ); -/// ``` -pub struct InstructionFormatter { - content: alloc::string::String, -} - -impl InstructionFormatter { - /// create an `InstructionFormatter` with default settings. `InstructionFormatter`'s default - /// settings format instructions identically to their corresponding `fmt::Display`. - pub fn new() -> Self { - let mut buf = alloc::string::String::new(); - // TODO: move 512 out to a MAX_INSTRUCTION_LEN const and appropriate justification (and - // fuzzing and ..) - buf.reserve(512); - Self { - content: buf, - } - } - - /// format `inst` through this formatter, storing the formatted text in this formatter's - /// internal buffer. returns a borrow of that same internal buffer for convenience. - /// - /// this clears and reuses an internal buffer; if an instruction had been previously formatted - /// through this formatter, it will be overwritten. - pub fn format_inst<'formatter>(&'formatter mut self, inst: &Instruction) -> Result<&'formatter str, fmt::Error> { - let mut handle = self.write_handle(); - - inst.write_to(&mut handle)?; - - Ok(self.text_str()) - } - - /// return a borrow of this formatter's buffer. if an instruction has been formatted, the - /// returned `&str` contains that formatted instruction's text. - pub fn text_str(&self) -> &str { - self.content.as_str() - } - - fn write_handle(&mut self) -> InstructionTextSink { - self.content.clear(); - InstructionTextSink { - buf: &mut self.content - } - } -} - -/// this private struct is guaranteed to contain a string that is long enough to hold a -/// fully-formatted x86 instruction. -/// -/// this is wildly dangerous in general use, but because of the constrained lifecycle of -/// `InstructionTextSink` in the context of `InstructionFormatter`, it's OK to use here. it is -/// wildly dangerous because writing to this sink does not bounds check and assumes the contained -/// `buf` is large enough for any input. as an example: if `buf` did not have enough space -/// available from its current position, the `write_*` methods would write into whatever happens to -/// be after `buf` in memory. -/// -/// don't make this pub. if this is pub in docs, it's a bug. -struct InstructionTextSink<'buf> { - buf: &'buf mut alloc::string::String -} - -impl<'buf> fmt::Write for InstructionTextSink<'buf> { - fn write_str(&mut self, s: &str) -> Result<(), core::fmt::Error> { - self.buf.write_str(s) - } - fn write_char(&mut self, c: char) -> Result<(), core::fmt::Error> { - // SAFETY: `buf` is assumed to be long enough to hold all input, `buf` at `underlying.len()` - // is valid for writing, but may be uninitialized. - // - // this function is essentially equivalent to `Vec::push` specialized for the case that - // `len < buf.capacity()`: - // https://github.com/rust-lang/rust/blob/be9e27e/library/alloc/src/vec/mod.rs#L1993-L2006 - unsafe { - let underlying = self.buf.as_mut_vec(); - // `InstructionTextSink::write_char` is only used by yaxpeax-x86, and is only used to - // write single ASCII characters. this is wrong in the general case, but `write_char` - // here is not going to be used in the general case. - if cfg!(debug_asertions) { - panic!("InstructionTextSink::write_char would truncate output"); - } - let to_push = c as u8; - // `ptr::write` here because `underlying.add(underlying.len())` may not point to an - // initialized value, which would mean that turning that pointer into a `&mut u8` to - // store through would be UB. `ptr::write` avoids taking the mut ref. - underlying.as_mut_ptr().offset(underlying.len() as isize).write(to_push); - // we have initialized all (one) bytes that `set_len` is increasing the length to - // include. - underlying.set_len(underlying.len() + 1); - } - Ok(()) - } -} - -/// this DisplaySink impl exists to support somewhat more performant buffering of the kinds of -/// strings `yaxpeax-x86` uses in formatting instructions. -impl DisplaySink for alloc::string::String { - #[inline(always)] - fn write_fixed_size(&mut self, s: &str) -> Result<(), core::fmt::Error> { - self.reserve(s.len()); - let buf = unsafe { self.as_mut_vec() }; - let new_bytes = s.as_bytes(); - - if new_bytes.len() == 0 { - unsafe { unreachable_kinda_unchecked() } - } - - if new_bytes.len() >= 16 { - unsafe { unreachable_kinda_unchecked() } - } - - unsafe { - let dest = buf.as_mut_ptr().offset(buf.len() as isize); - - // this used to be enough to bamboozle llvm away from - // https://github.com/rust-lang/rust/issues/92993#issuecomment-2028915232 - // if `s` is not fixed size. somewhere between Rust 1.68 and Rust 1.74 this stopped - // being sufficient, so `write_fixed_size` truly should only be used for fixed size `s` - // (otherwise this is a libc memcpy call in disguise). for fixed-size strings this - // unrolls into some kind of appropriate series of `mov`. - dest.offset(0 as isize).write(new_bytes[0]); - for i in 1..new_bytes.len() { - dest.offset(i as isize).write(new_bytes[i]); - } - - buf.set_len(buf.len() + new_bytes.len()); - } - - Ok(()) - } - unsafe fn write_lt_32(&mut self, s: &str) -> Result<(), fmt::Error> { - self.reserve(s.len()); - - // SAFETY: todo - let buf = unsafe { self.as_mut_vec() }; - let new_bytes = s.as_bytes(); - - // should get DCE - if new_bytes.len() >= 32 { - unsafe { core::hint::unreachable_unchecked() } - } - - unsafe { - let dest = buf.as_mut_ptr().offset(buf.len() as isize); - let src = new_bytes.as_ptr(); - - let rem = new_bytes.len() as isize; - - // set_len early because there is no way to avoid the following asm!() writing that - // same number of bytes into buf - buf.set_len(buf.len() + new_bytes.len()); - - core::arch::asm!( - "6:", - "cmp {rem:e}, 16", - "jb 7f", - "mov {buf:r}, qword ptr [{src} + {rem} - 16]", - "mov qword ptr [{dest} + {rem} - 16], {buf:r}", - "mov {buf:r}, qword ptr [{src} + {rem} - 8]", - "mov qword ptr [{dest} + {rem} - 8], {buf:r}", - "sub {rem:e}, 16", - "jz 11f", - "7:", - "cmp {rem:e}, 8", - "jb 8f", - "mov {buf:r}, qword ptr [{src} + {rem} - 8]", - "mov qword ptr [{dest} + {rem} - 8], {buf:r}", - "sub {rem:e}, 8", - "jz 11f", - "8:", - "cmp {rem:e}, 4", - "jb 9f", - "mov {buf:e}, dword ptr [{src} + {rem} - 4]", - "mov dword ptr [{dest} + {rem} - 4], {buf:e}", - "sub {rem:e}, 4", - "jz 11f", - "9:", - "cmp {rem:e}, 2", - "jb 10f", - "mov {buf:x}, word ptr [{src} + {rem} - 2]", - "mov word ptr [{dest} + {rem} - 2], {buf:x}", - "sub {rem:e}, 2", - "jz 11f", - "10:", - "cmp {rem:e}, 1", - "jb 11f", - "mov {buf:l}, byte ptr [{src} + {rem} - 1]", - "mov byte ptr [{dest} + {rem} - 1], {buf:l}", - "11:", - src = in(reg) src, - dest = in(reg) dest, - rem = inout(reg) rem => _, - buf = out(reg) _, - options(nostack), - ); - } - /* - for i in 0..new_bytes.len() { - unsafe { - buf.as_mut_ptr().offset(buf.len() as isize).offset(i as isize).write_volatile(new_bytes[i]); - } - } - */ - - Ok(()) - } - unsafe fn write_lt_16(&mut self, s: &str) -> Result<(), fmt::Error> { - self.reserve(s.len()); - - // SAFETY: todo - let buf = unsafe { self.as_mut_vec() }; - let new_bytes = s.as_bytes(); - - // should get DCE - if new_bytes.len() >= 16 { - unsafe { core::hint::unreachable_unchecked() } - } - - unsafe { - let dest = buf.as_mut_ptr().offset(buf.len() as isize); - let src = new_bytes.as_ptr(); - - let rem = new_bytes.len() as isize; - - // set_len early because there is no way to avoid the following asm!() writing that - // same number of bytes into buf - buf.set_len(buf.len() + new_bytes.len()); - - core::arch::asm!( - "7:", - "cmp {rem:e}, 8", - "jb 8f", - "mov {buf:r}, qword ptr [{src} + {rem} - 8]", - "mov qword ptr [{dest} + {rem} - 8], {buf:r}", - "sub {rem:e}, 8", - "jz 11f", - "8:", - "cmp {rem:e}, 4", - "jb 9f", - "mov {buf:e}, dword ptr [{src} + {rem} - 4]", - "mov dword ptr [{dest} + {rem} - 4], {buf:e}", - "sub {rem:e}, 4", - "jz 11f", - "9:", - "cmp {rem:e}, 2", - "jb 10f", - "mov {buf:x}, word ptr [{src} + {rem} - 2]", - "mov word ptr [{dest} + {rem} - 2], {buf:x}", - "sub {rem:e}, 2", - "jz 11f", - "10:", - "cmp {rem:e}, 1", - "jb 11f", - "mov {buf:l}, byte ptr [{src} + {rem} - 1]", - "mov byte ptr [{dest} + {rem} - 1], {buf:l}", - "11:", - src = in(reg) src, - dest = in(reg) dest, - rem = inout(reg) rem => _, - buf = out(reg) _, - options(nostack), - ); - } - /* - for i in 0..new_bytes.len() { - unsafe { - buf.as_mut_ptr().offset(buf.len() as isize).offset(i as isize).write_volatile(new_bytes[i]); - } - } - */ - - Ok(()) - } - unsafe fn write_lt_8(&mut self, s: &str) -> Result<(), fmt::Error> { - self.reserve(s.len()); - - // SAFETY: todo - let buf = unsafe { self.as_mut_vec() }; - let new_bytes = s.as_bytes(); - - // should get DCE - if new_bytes.len() >= 8 { - unsafe { core::hint::unreachable_unchecked() } - } - - unsafe { - let dest = buf.as_mut_ptr().offset(buf.len() as isize); - let src = new_bytes.as_ptr(); - - let rem = new_bytes.len() as isize; - - // set_len early because there is no way to avoid the following asm!() writing that - // same number of bytes into buf - buf.set_len(buf.len() + new_bytes.len()); - - core::arch::asm!( - "8:", - "cmp {rem:e}, 4", - "jb 9f", - "mov {buf:e}, dword ptr [{src} + {rem} - 4]", - "mov dword ptr [{dest} + {rem} - 4], {buf:e}", - "sub {rem:e}, 4", - "jz 11f", - "9:", - "cmp {rem:e}, 2", - "jb 10f", - "mov {buf:x}, word ptr [{src} + {rem} - 2]", - "mov word ptr [{dest} + {rem} - 2], {buf:x}", - "sub {rem:e}, 2", - "jz 11f", - "10:", - "cmp {rem:e}, 1", - "jb 11f", - "mov {buf:l}, byte ptr [{src} + {rem} - 1]", - "mov byte ptr [{dest} + {rem} - 1], {buf:l}", - "11:", - src = in(reg) src, - dest = in(reg) dest, - rem = inout(reg) rem => _, - buf = out(reg) _, - options(nostack), - ); - } - /* - for i in 0..new_bytes.len() { - unsafe { - buf.as_mut_ptr().offset(buf.len() as isize).offset(i as isize).write_volatile(new_bytes[i]); - } - } - */ - - Ok(()) - } - /// write a u8 to the output as a base-16 integer. - /// - /// this is provided for optimization opportunities when the formatted integer can be written - /// directly to the sink (rather than formatted to an intermediate buffer and output as a - /// followup step) - #[inline(always)] - fn write_u8(&mut self, mut v: u8) -> Result<(), core::fmt::Error> { - if v == 0 { - return self.write_fixed_size("0"); - } - // we can fairly easily predict the size of a formatted string here with lzcnt, which also - // means we can write directly into the correct offsets of the output string. - let printed_size = ((8 - v.leading_zeros() + 3) >> 2) as usize; - - self.reserve(printed_size); - - let buf = unsafe { self.as_mut_vec() }; - let new_len = buf.len() + printed_size; - - unsafe { - buf.set_len(new_len); - } - let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; - - loop { - let digit = v % 16; - let c = c_to_hex(digit as u8); - unsafe { - p = p.offset(-1); - p.write(c); - } - v = v / 16; - if v == 0 { - break; - } - } - - Ok(()) - } - /// write a u16 to the output as a base-16 integer. - /// - /// this is provided for optimization opportunities when the formatted integer can be written - /// directly to the sink (rather than formatted to an intermediate buffer and output as a - /// followup step) - #[inline(always)] - fn write_u16(&mut self, mut v: u16) -> Result<(), core::fmt::Error> { - if v == 0 { - return self.write_fixed_size("0"); - } - // we can fairly easily predict the size of a formatted string here with lzcnt, which also - // means we can write directly into the correct offsets of the output string. - let printed_size = ((16 - v.leading_zeros() + 3) >> 2) as usize; - - self.reserve(printed_size); - - let buf = unsafe { self.as_mut_vec() }; - let new_len = buf.len() + printed_size; - - unsafe { - buf.set_len(new_len); - } - let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; - - loop { - let digit = v % 16; - let c = c_to_hex(digit as u8); - unsafe { - p = p.offset(-1); - p.write(c); - } - v = v / 16; - if v == 0 { - break; - } - } - - Ok(()) - } - /// write a u32 to the output as a base-16 integer. - /// - /// this is provided for optimization opportunities when the formatted integer can be written - /// directly to the sink (rather than formatted to an intermediate buffer and output as a - /// followup step) - #[inline(always)] - fn write_u32(&mut self, mut v: u32) -> Result<(), core::fmt::Error> { - if v == 0 { - return self.write_fixed_size("0"); - } - // we can fairly easily predict the size of a formatted string here with lzcnt, which also - // means we can write directly into the correct offsets of the output string. - let printed_size = ((32 - v.leading_zeros() + 3) >> 2) as usize; - - self.reserve(printed_size); - - let buf = unsafe { self.as_mut_vec() }; - let new_len = buf.len() + printed_size; - - unsafe { - buf.set_len(new_len); - } - let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; - - loop { - let digit = v % 16; - let c = c_to_hex(digit as u8); - unsafe { - p = p.offset(-1); - p.write(c); - } - v = v / 16; - if v == 0 { - break; - } - } - - Ok(()) - } - /// write a u64 to the output as a base-16 integer. - /// - /// this is provided for optimization opportunities when the formatted integer can be written - /// directly to the sink (rather than formatted to an intermediate buffer and output as a - /// followup step) - #[inline(always)] - fn write_u64(&mut self, mut v: u64) -> Result<(), core::fmt::Error> { - if v == 0 { - return self.write_fixed_size("0"); - } - // we can fairly easily predict the size of a formatted string here with lzcnt, which also - // means we can write directly into the correct offsets of the output string. - let printed_size = ((64 - v.leading_zeros() + 3) >> 2) as usize; - - self.reserve(printed_size); - - let buf = unsafe { self.as_mut_vec() }; - let new_len = buf.len() + printed_size; - - unsafe { - buf.set_len(new_len); - } - let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; - - loop { - let digit = v % 16; - let c = c_to_hex(digit as u8); - unsafe { - p = p.offset(-1); - p.write(c); - } - v = v / 16; - if v == 0 { - break; - } - } - - Ok(()) - } - fn span_start(&mut self, _ty: TokenType) {} - fn span_end(&mut self, _ty: TokenType) {} -} - -impl<'buf> DisplaySink for InstructionTextSink<'buf> { - #[inline(always)] - fn write_fixed_size(&mut self, s: &str) -> Result<(), core::fmt::Error> { - let buf = unsafe { self.buf.as_mut_vec() }; - let new_bytes = s.as_bytes(); - - if new_bytes.len() == 0 { - return Ok(()); - } - - if new_bytes.len() >= 16 { - unsafe { unreachable_kinda_unchecked() } - } - - unsafe { - let dest = buf.as_mut_ptr().offset(buf.len() as isize); - - // this used to be enough to bamboozle llvm away from - // https://github.com/rust-lang/rust/issues/92993#issuecomment-2028915232https://github.com/rust-lang/rust/issues/92993#issuecomment-2028915232 - // if `s` is not fixed size. somewhere between Rust 1.68 and Rust 1.74 this stopped - // being sufficient, so `write_fixed_size` truly should only be used for fixed size `s` - // (otherwise this is a libc memcpy call in disguise). for fixed-size strings this - // unrolls into some kind of appropriate series of `mov`. - dest.offset(0 as isize).write(new_bytes[0]); - for i in 1..new_bytes.len() { - dest.offset(i as isize).write(new_bytes[i]); - } - - buf.set_len(buf.len() + new_bytes.len()); - } - - Ok(()) - } - unsafe fn write_lt_32(&mut self, s: &str) -> Result<(), fmt::Error> { - // SAFETY: todo - let buf = unsafe { self.buf.as_mut_vec() }; - let new_bytes = s.as_bytes(); - - // should get DCE - if new_bytes.len() >= 32 { - unsafe { core::hint::unreachable_unchecked() } - } - - unsafe { - let dest = buf.as_mut_ptr().offset(buf.len() as isize); - let src = new_bytes.as_ptr(); - - let rem = new_bytes.len() as isize; - - // set_len early because there is no way to avoid the following asm!() writing that - // same number of bytes into buf - buf.set_len(buf.len() + new_bytes.len()); - - core::arch::asm!( - "6:", - "cmp {rem:e}, 16", - "jb 7f", - "mov {buf:r}, qword ptr [{src} + {rem} - 16]", - "mov qword ptr [{dest} + {rem} - 16], {buf:r}", - "mov {buf:r}, qword ptr [{src} + {rem} - 8]", - "mov qword ptr [{dest} + {rem} - 8], {buf:r}", - "sub {rem:e}, 16", - "jz 11f", - "7:", - "cmp {rem:e}, 8", - "jb 8f", - "mov {buf:r}, qword ptr [{src} + {rem} - 8]", - "mov qword ptr [{dest} + {rem} - 8], {buf:r}", - "sub {rem:e}, 8", - "jz 11f", - "8:", - "cmp {rem:e}, 4", - "jb 9f", - "mov {buf:e}, dword ptr [{src} + {rem} - 4]", - "mov dword ptr [{dest} + {rem} - 4], {buf:e}", - "sub {rem:e}, 4", - "jz 11f", - "9:", - "cmp {rem:e}, 2", - "jb 10f", - "mov {buf:x}, word ptr [{src} + {rem} - 2]", - "mov word ptr [{dest} + {rem} - 2], {buf:x}", - "sub {rem:e}, 2", - "jz 11f", - "10:", - "cmp {rem:e}, 1", - "jb 11f", - "mov {buf:l}, byte ptr [{src} + {rem} - 1]", - "mov byte ptr [{dest} + {rem} - 1], {buf:l}", - "11:", - src = in(reg) src, - dest = in(reg) dest, - rem = inout(reg) rem => _, - buf = out(reg) _, - options(nostack), - ); - } - /* - for i in 0..new_bytes.len() { - unsafe { - buf.as_mut_ptr().offset(buf.len() as isize).offset(i as isize).write_volatile(new_bytes[i]); - } - } - */ - - Ok(()) - } - unsafe fn write_lt_16(&mut self, s: &str) -> Result<(), fmt::Error> { - // SAFETY: todo - let buf = unsafe { self.buf.as_mut_vec() }; - let new_bytes = s.as_bytes(); - - // should get DCE - if new_bytes.len() >= 16 { - unsafe { core::hint::unreachable_unchecked() } - } - - unsafe { - let dest = buf.as_mut_ptr().offset(buf.len() as isize); - let src = new_bytes.as_ptr(); - - let rem = new_bytes.len() as isize; - - // set_len early because there is no way to avoid the following asm!() writing that - // same number of bytes into buf - buf.set_len(buf.len() + new_bytes.len()); - - core::arch::asm!( - "7:", - "cmp {rem:e}, 8", - "jb 8f", - "mov {buf:r}, qword ptr [{src} + {rem} - 8]", - "mov qword ptr [{dest} + {rem} - 8], {buf:r}", - "sub {rem:e}, 8", - "jz 11f", - "8:", - "cmp {rem:e}, 4", - "jb 9f", - "mov {buf:e}, dword ptr [{src} + {rem} - 4]", - "mov dword ptr [{dest} + {rem} - 4], {buf:e}", - "sub {rem:e}, 4", - "jz 11f", - "9:", - "cmp {rem:e}, 2", - "jb 10f", - "mov {buf:x}, word ptr [{src} + {rem} - 2]", - "mov word ptr [{dest} + {rem} - 2], {buf:x}", - "sub {rem:e}, 2", - "jz 11f", - "10:", - "cmp {rem:e}, 1", - "jb 11f", - "mov {buf:l}, byte ptr [{src} + {rem} - 1]", - "mov byte ptr [{dest} + {rem} - 1], {buf:l}", - "11:", - src = in(reg) src, - dest = in(reg) dest, - rem = inout(reg) rem => _, - buf = out(reg) _, - options(nostack), - ); - } - /* - for i in 0..new_bytes.len() { - unsafe { - buf.as_mut_ptr().offset(buf.len() as isize).offset(i as isize).write_volatile(new_bytes[i]); - } - } - */ - - Ok(()) - } - unsafe fn write_lt_8(&mut self, s: &str) -> Result<(), fmt::Error> { - // SAFETY: todo - let buf = unsafe { self.buf.as_mut_vec() }; - let new_bytes = s.as_bytes(); - - // should get DCE - if new_bytes.len() >= 8 { - unsafe { core::hint::unreachable_unchecked() } - } - - unsafe { - let dest = buf.as_mut_ptr().offset(buf.len() as isize); - let src = new_bytes.as_ptr(); - - let rem = new_bytes.len() as isize; - - // set_len early because there is no way to avoid the following asm!() writing that - // same number of bytes into buf - buf.set_len(buf.len() + new_bytes.len()); - - core::arch::asm!( - "8:", - "cmp {rem:e}, 4", - "jb 9f", - "mov {buf:e}, dword ptr [{src} + {rem} - 4]", - "mov dword ptr [{dest} + {rem} - 4], {buf:e}", - "sub {rem:e}, 4", - "jz 11f", - "9:", - "cmp {rem:e}, 2", - "jb 10f", - "mov {buf:x}, word ptr [{src} + {rem} - 2]", - "mov word ptr [{dest} + {rem} - 2], {buf:x}", - "sub {rem:e}, 2", - "jz 11f", - "10:", - "cmp {rem:e}, 1", - "jb 11f", - "mov {buf:l}, byte ptr [{src} + {rem} - 1]", - "mov byte ptr [{dest} + {rem} - 1], {buf:l}", - "11:", - src = in(reg) src, - dest = in(reg) dest, - rem = inout(reg) rem => _, - buf = out(reg) _, - options(nostack), - ); - } - /* - for i in 0..new_bytes.len() { - unsafe { - buf.as_mut_ptr().offset(buf.len() as isize).offset(i as isize).write_volatile(new_bytes[i]); - } - } - */ - - Ok(()) - } - /// write a u8 to the output as a base-16 integer. - /// - /// this is provided for optimization opportunities when the formatted integer can be written - /// directly to the sink (rather than formatted to an intermediate buffer and output as a - /// followup step) - #[inline(always)] - fn write_u8(&mut self, mut v: u8) -> Result<(), core::fmt::Error> { - if v == 0 { - return self.write_fixed_size("0"); - } - // we can fairly easily predict the size of a formatted string here with lzcnt, which also - // means we can write directly into the correct offsets of the output string. - let printed_size = ((8 - v.leading_zeros() + 3) >> 2) as usize; - - let buf = unsafe { self.buf.as_mut_vec() }; - let new_len = buf.len() + printed_size; - - unsafe { - buf.set_len(new_len); - } - let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; - - loop { - let digit = v % 16; - let c = c_to_hex(digit as u8); - unsafe { - p = p.offset(-1); - p.write(c); - } - v = v / 16; - if v == 0 { - break; - } - } - - Ok(()) - } - /// write a u16 to the output as a base-16 integer. - /// - /// this is provided for optimization opportunities when the formatted integer can be written - /// directly to the sink (rather than formatted to an intermediate buffer and output as a - /// followup step) - #[inline(always)] - fn write_u16(&mut self, mut v: u16) -> Result<(), core::fmt::Error> { - if v == 0 { - return self.write_fixed_size("0"); - } - // we can fairly easily predict the size of a formatted string here with lzcnt, which also - // means we can write directly into the correct offsets of the output string. - let printed_size = ((16 - v.leading_zeros() + 3) >> 2) as usize; - - let buf = unsafe { self.buf.as_mut_vec() }; - let new_len = buf.len() + printed_size; - - unsafe { - buf.set_len(new_len); - } - let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; - - loop { - let digit = v % 16; - let c = c_to_hex(digit as u8); - unsafe { - p = p.offset(-1); - p.write(c); - } - v = v / 16; - if v == 0 { - break; - } - } - - Ok(()) - } - /// write a u32 to the output as a base-16 integer. - /// - /// this is provided for optimization opportunities when the formatted integer can be written - /// directly to the sink (rather than formatted to an intermediate buffer and output as a - /// followup step) - #[inline(always)] - fn write_u32(&mut self, mut v: u32) -> Result<(), core::fmt::Error> { - if v == 0 { - return self.write_fixed_size("0"); - } - // we can fairly easily predict the size of a formatted string here with lzcnt, which also - // means we can write directly into the correct offsets of the output string. - let printed_size = ((32 - v.leading_zeros() + 3) >> 2) as usize; - - let buf = unsafe { self.buf.as_mut_vec() }; - let new_len = buf.len() + printed_size; - - unsafe { - buf.set_len(new_len); - } - let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; - - loop { - let digit = v % 16; - let c = c_to_hex(digit as u8); - unsafe { - p = p.offset(-1); - p.write(c); - } - v = v / 16; - if v == 0 { - break; - } - } - - Ok(()) - } - /// write a u64 to the output as a base-16 integer. - /// - /// this is provided for optimization opportunities when the formatted integer can be written - /// directly to the sink (rather than formatted to an intermediate buffer and output as a - /// followup step) - #[inline(always)] - fn write_u64(&mut self, mut v: u64) -> Result<(), core::fmt::Error> { - if v == 0 { - return self.write_fixed_size("0"); - } - // we can fairly easily predict the size of a formatted string here with lzcnt, which also - // means we can write directly into the correct offsets of the output string. - let printed_size = ((64 - v.leading_zeros() + 3) >> 2) as usize; - - let buf = unsafe { self.buf.as_mut_vec() }; - let new_len = buf.len() + printed_size; - - unsafe { - buf.set_len(new_len); - } - let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; - - loop { - let digit = v % 16; - let c = c_to_hex(digit as u8); - unsafe { - p = p.offset(-1); - p.write(c); - } - v = v / 16; - if v == 0 { - break; - } - } - - Ok(()) - } - fn span_start(&mut self, _ty: TokenType) {} - fn span_end(&mut self, _ty: TokenType) {} -} - struct ColorizingOperandVisitor<'a, T> { f: &'a mut T, } @@ -4772,6 +3711,9 @@ pub struct InstructionDisplayer<'instr> { * * so write to some Write thing i guess. bite me. i really just want to * stop thinking about how to support printing instructions... + * + * UPDATE: really wish i thought of DisplaySink back then, really wish this was bounded as T: + * DisplaySink. */ impl <'instr, T: fmt::Write, Y: YaxColors> Colorize for InstructionDisplayer<'instr> { fn colorize(&self, colors: &Y, out: &mut T) -> fmt::Result { @@ -4784,32 +3726,15 @@ impl <'instr, T: fmt::Write, Y: YaxColors> Colorize for InstructionDisplay /// No per-operand context when contextualizing an instruction! struct NoContext; -extern crate alloc; - -// TODO: find a better place to put this.... -fn c_to_hex(c: u8) -> u8 { - /* - static CHARSET: &'static [u8; 16] = b"0123456789abcdef"; - CHARSET[c as usize] - */ - // the conditional branch below is faster than a lookup, yes - if c < 10 { - b'0' + c - } else { - b'a' + c - 10 - } -} - impl Instruction { #[cfg_attr(feature="profiling", inline(never))] pub fn write_to(&self, out: &mut T) -> fmt::Result { contextualize_intel(self, out) -// self.display_with(DisplayStyle::Intel).contextualize(&NoColors, 0, Some(&NoContext), out) } } #[cfg_attr(feature="profiling", inline(never))] -fn contextualize_intel(instr: &Instruction, out: &mut T) -> fmt::Result { +pub(crate) fn contextualize_intel(instr: &Instruction, out: &mut T) -> fmt::Result { if instr.xacquire() { out.write_fixed_size("xacquire ")?; } @@ -4951,7 +3876,7 @@ fn contextualize_intel(instr: &Instruction, out: &mut T) -> fmt: Ok(()) } -fn contextualize_c(instr: &Instruction, _address: u64, _context: Option<&NoContext>, out: &mut T) -> fmt::Result { +pub(crate) fn contextualize_c(instr: &Instruction, out: &mut T) -> fmt::Result { let mut brace_count = 0; let mut prefixed = false; @@ -5286,23 +4211,22 @@ fn contextualize_c(instr: &Instruction, _address: u64, _context: } impl <'instr, T: fmt::Write, Y: YaxColors> ShowContextual for InstructionDisplayer<'instr> { - fn contextualize(&self, _colors: &Y, address: u64, context: Option<&NoContext>, out: &mut T) -> fmt::Result { + fn contextualize(&self, _colors: &Y, _address: u64, _context: Option<&NoContext>, out: &mut T) -> fmt::Result { let InstructionDisplayer { instr, style, } = self; + let mut out = crate::display::NoColorsSink { + out: out, + }; + match style { DisplayStyle::Intel => { - let mut out = NoColorsSink { - out, - }; - let out = &mut out; - - contextualize_intel(instr, out) + contextualize_intel(instr, &mut out) } DisplayStyle::C => { - contextualize_c(instr, address, context, out) + contextualize_c(instr, &mut out) } } } @@ -5311,7 +4235,7 @@ impl <'instr, T: fmt::Write, Y: YaxColors> ShowContextual #[cfg(feature="std")] impl ShowContextual], T, Y> for Instruction { fn contextualize(&self, colors: &Y, _address: u64, context: Option<&[Option]>, out: &mut T) -> fmt::Result { - let mut out = NoColorsSink { + let mut out = crate::display::NoColorsSink { out, }; let out = &mut out; @@ -5528,3 +4452,87 @@ impl<'a, F: DisplaySink> crate::long_mode::OperandVisitor for RelativeBranchPrin } } +/// helper to format `amd64` instructions with highest throughput and least configuration. this is +/// functionally a buffer for one x86 instruction's text. +/// +/// ### when to use this over `fmt::Display`? +/// +/// `fmt::Display` is a fair choice in most cases. in some cases, `InstructionTextBuffer` may +/// support formatting options that may be difficult to configure for a `Display` impl. +/// additionally, `InstructionTextBuffer` may be able to specialize more effectively where +/// `fmt::Display`, writing to a generic `fmt::Write`, may not. +/// +/// if your use case for `yaxpeax-x86` involves being bounded on the speed of disassembling and +/// formatting instructions, [`InstructionTextBuffer::format_inst`] has been measured as up to 11% +/// faster than an equivalent `write!(buf, "{}", inst)`. +/// +/// `InstructionTextBuffer` involves internal allocations; if your use case for `yaxpeax-x86` +/// requires allocations never occurring, it is not an appropriate tool. +/// +/// ### example +/// +/// ``` +/// use yaxpeax_x86::long_mode::InstDecoder; +/// use yaxpeax_x86::long_mode::InstructionTextBuffer; +/// +/// let bytes = &[0x33, 0xc0]; +/// let inst = InstDecoder::default().decode_slice(bytes).expect("can decode"); +/// let mut text_buf = InstructionTextBuffer::new(); +/// assert_eq!( +/// text_buf.format_inst(&inst).expect("can format"), +/// "xor eax, eax" +/// ); +/// +/// // or, getting the formatted instruction with `text_str`: +/// assert_eq!( +/// text_buf.text_str(), +/// "xor eax, eax" +/// ); +/// ``` +pub struct InstructionTextBuffer { + content: alloc::string::String, +} + +impl InstructionTextBuffer { + /// create an `InstructionTextBuffer` with default settings. `InstructionTextBuffer`'s default + /// settings format instructions identically to their corresponding `fmt::Display`. + pub fn new() -> Self { + let mut buf = alloc::string::String::new(); + // TODO: move 512 out to a MAX_INSTRUCTION_LEN const and appropriate justification (and + // fuzzing and ..) + buf.reserve(512); + Self { + content: buf, + } + } + + /// format `inst` into this buffer. returns a borrow of that same internal buffer for convenience. + /// + /// this clears and reuses an internal buffer; if an instruction had been previously formatted + /// through this buffer, it will be overwritten. + pub fn format_inst<'buf, 'instr>(&'buf mut self, display: &InstructionDisplayer<'instr>) -> Result<&'buf str, fmt::Error> { + let mut handle = self.write_handle(); + + match display.style { + DisplayStyle::Intel => { + contextualize_intel(&display.instr, &mut handle)?; + } + DisplayStyle::C => { + contextualize_c(&display.instr, &mut handle)?; + } + } + + Ok(self.text_str()) + } + + /// return a borrow of the internal buffer. if an instruction has been formatted, the + /// returned `&str` contains that instruction's buffered text. + pub fn text_str(&self) -> &str { + self.content.as_str() + } + + fn write_handle(&mut self) -> crate::display::InstructionTextSink { + self.content.clear(); + crate::display::InstructionTextSink::new(&mut self.content) + } +} diff --git a/src/long_mode/mod.rs b/src/long_mode/mod.rs index 44ed89f..418d57f 100644 --- a/src/long_mode/mod.rs +++ b/src/long_mode/mod.rs @@ -8,8 +8,6 @@ pub use crate::MemoryAccessSize; #[cfg(feature = "fmt")] pub use self::display::{DisplayStyle, InstructionDisplayer}; -#[cfg(feature = "fmt")] -pub use self::display::{InstructionFormatter, NoColorsSink, DisplaySink, TokenType}; use core::cmp::PartialEq; use crate::safer_unchecked::unreachable_kinda_unchecked as unreachable_unchecked; From d16cc79d7b7091f67328a0080634ce6cd4880dbd Mon Sep 17 00:00:00 2001 From: iximeow Date: Fri, 21 Jun 2024 02:05:24 -0700 Subject: [PATCH 58/95] things compile again, add a few more caution signs around InstructionTextBuffer --- src/display.rs | 56 ++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 2 +- src/long_mode/display.rs | 17 +++++++++--- src/long_mode/mod.rs | 2 +- test/long_mode/mod.rs | 8 +++--- 5 files changed, 76 insertions(+), 9 deletions(-) diff --git a/src/display.rs b/src/display.rs index e495aee..9b72cb3 100644 --- a/src/display.rs +++ b/src/display.rs @@ -211,6 +211,11 @@ impl<'buf> fmt::Write for InstructionTextSink<'buf> { self.buf.write_str(s) } fn write_char(&mut self, c: char) -> Result<(), core::fmt::Error> { + if cfg!(debug_assertions) { + if self.buf.capacity() < self.buf.len() + 1 { + panic!("InstructionTextSink::write_char would overflow output"); + } + } // SAFETY: `buf` is assumed to be long enough to hold all input, `buf` at `underlying.len()` // is valid for writing, but may be uninitialized. // @@ -640,6 +645,12 @@ impl DisplaySink for alloc::string::String { impl<'buf> DisplaySink for InstructionTextSink<'buf> { #[inline(always)] fn write_fixed_size(&mut self, s: &str) -> Result<(), core::fmt::Error> { + if cfg!(debug_assertions) { + if self.buf.capacity() < self.buf.len() + s.len() { + panic!("InstructionTextSink::write_fixed_size would overflow output"); + } + } + let buf = unsafe { self.buf.as_mut_vec() }; let new_bytes = s.as_bytes(); @@ -671,6 +682,12 @@ impl<'buf> DisplaySink for InstructionTextSink<'buf> { Ok(()) } unsafe fn write_lt_32(&mut self, s: &str) -> Result<(), fmt::Error> { + if cfg!(debug_assertions) { + if self.buf.capacity() < self.buf.len() + s.len() { + panic!("InstructionTextSink::write_lt_32 would overflow output"); + } + } + // SAFETY: todo let buf = unsafe { self.buf.as_mut_vec() }; let new_bytes = s.as_bytes(); @@ -745,6 +762,12 @@ impl<'buf> DisplaySink for InstructionTextSink<'buf> { Ok(()) } unsafe fn write_lt_16(&mut self, s: &str) -> Result<(), fmt::Error> { + if cfg!(debug_assertions) { + if self.buf.capacity() < self.buf.len() + s.len() { + panic!("InstructionTextSink::write_lt_16 would overflow output"); + } + } + // SAFETY: todo let buf = unsafe { self.buf.as_mut_vec() }; let new_bytes = s.as_bytes(); @@ -810,6 +833,12 @@ impl<'buf> DisplaySink for InstructionTextSink<'buf> { Ok(()) } unsafe fn write_lt_8(&mut self, s: &str) -> Result<(), fmt::Error> { + if cfg!(debug_assertions) { + if self.buf.capacity() < self.buf.len() + s.len() { + panic!("InstructionTextSink::write_lt_8 would overflow output"); + } + } + // SAFETY: todo let buf = unsafe { self.buf.as_mut_vec() }; let new_bytes = s.as_bytes(); @@ -881,6 +910,12 @@ impl<'buf> DisplaySink for InstructionTextSink<'buf> { // means we can write directly into the correct offsets of the output string. let printed_size = ((8 - v.leading_zeros() + 3) >> 2) as usize; + if cfg!(debug_assertions) { + if self.buf.capacity() < self.buf.len() + printed_size { + panic!("InstructionTextSink::write_u8 would overflow output"); + } + } + let buf = unsafe { self.buf.as_mut_vec() }; let new_len = buf.len() + printed_size; @@ -914,10 +949,17 @@ impl<'buf> DisplaySink for InstructionTextSink<'buf> { if v == 0 { return self.write_fixed_size("0"); } + // we can fairly easily predict the size of a formatted string here with lzcnt, which also // means we can write directly into the correct offsets of the output string. let printed_size = ((16 - v.leading_zeros() + 3) >> 2) as usize; + if cfg!(debug_assertions) { + if self.buf.capacity() < self.buf.len() + printed_size { + panic!("InstructionTextSink::write_u16 would overflow output"); + } + } + let buf = unsafe { self.buf.as_mut_vec() }; let new_len = buf.len() + printed_size; @@ -951,10 +993,17 @@ impl<'buf> DisplaySink for InstructionTextSink<'buf> { if v == 0 { return self.write_fixed_size("0"); } + // we can fairly easily predict the size of a formatted string here with lzcnt, which also // means we can write directly into the correct offsets of the output string. let printed_size = ((32 - v.leading_zeros() + 3) >> 2) as usize; + if cfg!(debug_assertions) { + if self.buf.capacity() < self.buf.len() + printed_size { + panic!("InstructionTextSink::write_u32 would overflow output"); + } + } + let buf = unsafe { self.buf.as_mut_vec() }; let new_len = buf.len() + printed_size; @@ -988,10 +1037,17 @@ impl<'buf> DisplaySink for InstructionTextSink<'buf> { if v == 0 { return self.write_fixed_size("0"); } + // we can fairly easily predict the size of a formatted string here with lzcnt, which also // means we can write directly into the correct offsets of the output string. let printed_size = ((64 - v.leading_zeros() + 3) >> 2) as usize; + if cfg!(debug_assertions) { + if self.buf.capacity() < self.buf.len() + printed_size { + panic!("InstructionTextSink::write_u64 would overflow output"); + } + } + let buf = unsafe { self.buf.as_mut_vec() }; let new_len = buf.len() + printed_size; diff --git a/src/lib.rs b/src/lib.rs index 709563b..491b6f0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -140,7 +140,7 @@ pub use real_mode::Arch as x86_16; mod safer_unchecked; #[cfg(feature = "fmt")] -mod display; +pub mod display; const MEM_SIZE_STRINGS: [&'static str; 65] = [ "BUG", diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index d9799e1..3615538 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -4303,6 +4303,7 @@ impl ShowContextual crate::long_mode::OperandVisitor for RelativeBranchPrin /// ``` /// use yaxpeax_x86::long_mode::InstDecoder; /// use yaxpeax_x86::long_mode::InstructionTextBuffer; +/// use yaxpeax_x86::long_mode::DisplayStyle; /// /// let bytes = &[0x33, 0xc0]; /// let inst = InstDecoder::default().decode_slice(bytes).expect("can decode"); /// let mut text_buf = InstructionTextBuffer::new(); /// assert_eq!( -/// text_buf.format_inst(&inst).expect("can format"), +/// text_buf.format_inst(&inst.display_with(DisplayStyle::Intel)).expect("can format"), /// "xor eax, eax" /// ); /// @@ -4511,7 +4513,9 @@ impl InstructionTextBuffer { /// this clears and reuses an internal buffer; if an instruction had been previously formatted /// through this buffer, it will be overwritten. pub fn format_inst<'buf, 'instr>(&'buf mut self, display: &InstructionDisplayer<'instr>) -> Result<&'buf str, fmt::Error> { - let mut handle = self.write_handle(); + // Safety: this sink is used to format exactly one instruction and then dropped. it can + // never escape `format_inst`. + let mut handle = unsafe { self.write_handle() }; match display.style { DisplayStyle::Intel => { @@ -4531,8 +4535,15 @@ impl InstructionTextBuffer { self.content.as_str() } - fn write_handle(&mut self) -> crate::display::InstructionTextSink { + /// do the necessary bookkeeping and provide an `InstructionTextSink` to write an instruction + /// into. + /// + /// SAFETY: callers must print at most one instruction into this handle. + unsafe fn write_handle(&mut self) -> crate::display::InstructionTextSink { self.content.clear(); + // Safety: `content` was just cleared, so writing begins at the start of the buffer. + // `content`is large enough to hold a fully-formatted instruction (see + // `InstructionTextBuffer::new`). crate::display::InstructionTextSink::new(&mut self.content) } } diff --git a/src/long_mode/mod.rs b/src/long_mode/mod.rs index 418d57f..9aeacdc 100644 --- a/src/long_mode/mod.rs +++ b/src/long_mode/mod.rs @@ -7,7 +7,7 @@ pub mod uarch; pub use crate::MemoryAccessSize; #[cfg(feature = "fmt")] -pub use self::display::{DisplayStyle, InstructionDisplayer}; +pub use self::display::{DisplayStyle, InstructionDisplayer, InstructionTextBuffer}; use core::cmp::PartialEq; use crate::safer_unchecked::unreachable_kinda_unchecked as unreachable_unchecked; diff --git a/test/long_mode/mod.rs b/test/long_mode/mod.rs index dcc9aad..7742496 100644 --- a/test/long_mode/mod.rs +++ b/test/long_mode/mod.rs @@ -79,7 +79,7 @@ fn test_display_under(decoder: &InstDecoder, data: &[u8], expected: &'static str ); let mut text2 = String::new(); - let mut out = yaxpeax_x86::long_mode::NoColorsSink { + let mut out = yaxpeax_x86::display::NoColorsSink { out: &mut text2, }; instr.write_to(&mut out).expect("printing succeeds"); @@ -94,12 +94,12 @@ fn test_display_under(decoder: &InstDecoder, data: &[u8], expected: &'static str text, ); - let mut formatter = yaxpeax_x86::long_mode::InstructionFormatter::new(); - let text3 = formatter.format_inst(&instr).expect("printing succeeds"); + let mut formatter = yaxpeax_x86::long_mode::InstructionTextBuffer::new(); + let text3 = formatter.format_inst(&instr.display_with(yaxpeax_x86::long_mode::DisplayStyle::Intel)).expect("printing succeeds"); assert!( text3 == text, - "display error through InstructionFormatter for {}:\n decoded: {:?} under decoder {}\n displayed: {}\n expected: {}\n", + "display error through InstructionTextBuffer for {}:\n decoded: {:?} under decoder {}\n displayed: {}\n expected: {}\n", hex, instr, decoder, From 347042c45ced56b37a665a2c4b042b7f7aae8e03 Mon Sep 17 00:00:00 2001 From: iximeow Date: Sat, 22 Jun 2024 00:25:01 -0700 Subject: [PATCH 59/95] extract reusable display bits into yaxpeax-arch, add a visitor fn to Operand comes with deleting the body of impl Colorize for Operand, because we can reuse the normal operand formatting code --- src/display.rs | 1076 -------------------------------------- src/lib.rs | 2 - src/long_mode/display.rs | 285 ++-------- src/long_mode/mod.rs | 46 ++ test/long_mode/mod.rs | 2 +- 5 files changed, 99 insertions(+), 1312 deletions(-) diff --git a/src/display.rs b/src/display.rs index 9b72cb3..e69de29 100644 --- a/src/display.rs +++ b/src/display.rs @@ -1,1076 +0,0 @@ -use core::fmt; - -use crate::safer_unchecked::unreachable_kinda_unchecked; - -extern crate alloc; - -// TODO: find a better place to put this.... -fn c_to_hex(c: u8) -> u8 { - /* - static CHARSET: &'static [u8; 16] = b"0123456789abcdef"; - CHARSET[c as usize] - */ - // the conditional branch below is faster than a lookup, yes - if c < 10 { - b'0' + c - } else { - b'a' + c - 10 - } -} - -pub enum TokenType { - Mnemonic, - Operand, - Immediate, - Register, - Offset, -} - -/// `DisplaySink` allows client code to collect output and minimal markup. this is currently used -/// in formatting instructions for two reasons: -/// * `DisplaySink` implementations have the opportunity to collect starts and ends of tokens at -/// the same time as collecting output itself. -/// * `DisplaySink` implementations provides specialized functions for writing strings in -/// circumstances where a simple "use `core::fmt`" might incur unwanted overhead. -/// -/// spans are reported through `span_start` and `span_exit` to avoid constraining implementations -/// into tracking current output offset (which may not be knowable) or span size (which may be -/// knowable, but incur additional overhead to compute or track). -/// -/// spans are entered and exited in a FILO manner: a function writing to some `DisplaySink` must -/// exit spans in reverse order to when they are entered. a function sequence like -/// `sink.span_start(Operand); sink.span_start(Immediate); sink.span_exit(Operand)` is in error. -/// -/// the `write_*` helpers on `DisplaySink` may be able to take advantage of contraints described in -/// documentation here to better support writing some kinds of inputs than a fully-general solution -/// (such as `core::fmt`) might be able to yield. -/// -/// currently there are two motivating factors for `write_*` helpers: -/// -/// instruction formatting often involves writing small but variable-size strings, such as register -/// names, which is something of a pathological case for string appending as Rust currently exists: -/// this often becomes `memcpy` and specifically a call to the platform's `memcpy` (rather than an -/// inlined `rep movsb`) just to move 3-5 bytes. one relevant Rust issue for reference: -/// https://github.com/rust-lang/rust/issues/92993#issuecomment-2028915232 -/// -/// there are similar papercuts around formatting integers as base-16 numbers, such as -/// https://github.com/rust-lang/rust/pull/122770 . in isolation and in most applications these are -/// not a significant source of overhead. but for programs bounded on decoding and printing -/// instructions, these can add up to significant overhead - on the order of 10-20% of total -/// runtime. -/// -/// `DisplaySink` -pub trait DisplaySink: fmt::Write { - #[inline(always)] - fn write_fixed_size(&mut self, s: &str) -> Result<(), core::fmt::Error> { - self.write_str(s) - } - - /// write a string to this sink that is less than 32 bytes. this is provided for optimization - /// opportunities when writing a variable-length string with known max size. - /// - /// SAFETY: the provided `s` must be less than 32 bytes. if the provided string is longer than - /// 31 bytes, implementations may only copy part of a multi-byte codepoint while writing to a - /// utf-8 string. this may corrupt Rust strings. - unsafe fn write_lt_32(&mut self, s: &str) -> Result<(), core::fmt::Error> { - self.write_str(s) - } - /// write a string to this sink that is less than 16 bytes. this is provided for optimization - /// opportunities when writing a variable-length string with known max size. - /// - /// SAFETY: the provided `s` must be less than 16 bytes. if the provided string is longer than - /// 15 bytes, implementations may only copy part of a multi-byte codepoint while writing to a - /// utf-8 string. this may corrupt Rust strings. - unsafe fn write_lt_16(&mut self, s: &str) -> Result<(), core::fmt::Error> { - self.write_str(s) - } - /// write a string to this sink that is less than 8 bytes. this is provided for optimization - /// opportunities when writing a variable-length string with known max size. - /// - /// SAFETY: the provided `s` must be less than 8 bytes. if the provided string is longer than - /// 7 bytes, implementations may only copy part of a multi-byte codepoint while writing to a - /// utf-8 string. this may corrupt Rust strings. - unsafe fn write_lt_8(&mut self, s: &str) -> Result<(), core::fmt::Error> { - self.write_str(s) - } - - /// write a u8 to the output as a base-16 integer. - /// - /// this is provided for optimization opportunities when the formatted integer can be written - /// directly to the sink (rather than formatted to an intermediate buffer and output as a - /// followup step) - fn write_u8(&mut self, v: u8) -> Result<(), core::fmt::Error> { - write!(self, "{:x}", v) - } - /// write a u16 to the output as a base-16 integer. - /// - /// this is provided for optimization opportunities when the formatted integer can be written - /// directly to the sink (rather than formatted to an intermediate buffer and output as a - /// followup step) - fn write_u16(&mut self, v: u16) -> Result<(), core::fmt::Error> { - write!(self, "{:x}", v) - } - /// write a u32 to the output as a base-16 integer. - /// - /// this is provided for optimization opportunities when the formatted integer can be written - /// directly to the sink (rather than formatted to an intermediate buffer and output as a - /// followup step) - fn write_u32(&mut self, v: u32) -> Result<(), core::fmt::Error> { - write!(self, "{:x}", v) - } - /// write a u64 to the output as a base-16 integer. - /// - /// this is provided for optimization opportunities when the formatted integer can be written - /// directly to the sink (rather than formatted to an intermediate buffer and output as a - /// followup step) - fn write_u64(&mut self, v: u64) -> Result<(), core::fmt::Error> { - write!(self, "{:x}", v) - } - /// enter a region inside which output corresponds to a `ty`. - /// - /// the default implementation of these functions is as a no-op. this way, providing span - /// information to a `DisplaySink` that does not want it is eliminated at compile time. - /// - /// spans are entered and ended in a FILO manner: a function writing to some `DisplaySink` must - /// end spans in reverse order to when they are entered. a function sequence like - /// `sink.span_start(Operand); sink.span_start(Immediate); sink.span_end(Operand)` is in error. - /// - /// a simple use of `span_start`/`span_end` might look something like: - /// ```compile_fail - /// sink.span_start(Operand) - /// sink.write_char('[') - /// sink.span_start(Register) - /// sink.write_fixed_size("rbp") - /// sink.span_end(Register) - /// sink.write_char(']') - /// sink.span_end(Operand) - /// ``` - /// which writes the text `[rbp]`, with span indicators where the operand (`[ ... ]`) begins, - /// as well as the start and end of a register name. - fn span_start(&mut self, _ty: TokenType) { } - /// end a region where a `ty` was written. see docs on [`DisplaySink::span_start`] for more. - fn span_end(&mut self, _ty: TokenType) { } -} - -pub struct NoColorsSink<'a, T: fmt::Write> { - pub out: &'a mut T, -} - -impl<'a, T: fmt::Write> DisplaySink for NoColorsSink<'a, T> { - fn span_start(&mut self, _ty: TokenType) { } - fn span_end(&mut self, _ty: TokenType) { } -} - -impl<'a, T: fmt::Write> fmt::Write for NoColorsSink<'a, T> { - fn write_str(&mut self, s: &str) -> Result<(), core::fmt::Error> { - self.out.write_str(s) - } - fn write_char(&mut self, c: char) -> Result<(), core::fmt::Error> { - self.out.write_char(c) - } - fn write_fmt(&mut self, f: fmt::Arguments) -> Result<(), core::fmt::Error> { - self.out.write_fmt(f) - } -} - -/// this is an implementation detail of yaxpeax-arch and related crates. if you are a user of the -/// disassemblers, do not use this struct. do not depend on this struct existing. this struct is -/// not stable. this struct is not safe for general use. if you use this struct you and your -/// program will be eaten by gremlins. -/// -/// if you are implementing an instruction formatter for the yaxpeax family of crates: this struct -/// is guaranteed to contain a string that is long enough to hold a fully-formatted instruction. -/// because the buffer is guaranteed to be long enough, writes through `InstructionTextSink` are -/// not bounds-checked, and the buffer is never grown. -/// -/// this is wildly dangerous in general use. the public constructor of `InstructionTextSink` is -/// unsafe as a result. as used in `InstructionFormatter`, the buffer is guaranteed to be -/// `clear()`ed before use, `InstructionFormatter` ensures the buffer is large enough, *and* -/// `InstructionFormatter` never allows `InstructionTextSink` to exist in a context where it would -/// be written to without being rewound first. -/// -/// because this opens a very large hole through which `fmt::Write` can become unsafe, incorrect -/// uses of this struct will be hard to debug in general. `InstructionFormatter` is probably at the -/// limit of easily-reasoned-about lifecycle of the buffer, which "only" leaves the problem of -/// ensuring that instruction formatting impls this buffer is passed to are appropriately sized. -/// -/// this is intended to be hidden in docs. if you see this in docs, it's a bug. -#[doc(hidden)] -pub(crate) struct InstructionTextSink<'buf> { - buf: &'buf mut alloc::string::String -} - -impl<'buf> InstructionTextSink<'buf> { - pub unsafe fn new(buf: &'buf mut alloc::string::String) -> Self { - Self { buf } - } -} - -impl<'buf> fmt::Write for InstructionTextSink<'buf> { - fn write_str(&mut self, s: &str) -> Result<(), core::fmt::Error> { - self.buf.write_str(s) - } - fn write_char(&mut self, c: char) -> Result<(), core::fmt::Error> { - if cfg!(debug_assertions) { - if self.buf.capacity() < self.buf.len() + 1 { - panic!("InstructionTextSink::write_char would overflow output"); - } - } - // SAFETY: `buf` is assumed to be long enough to hold all input, `buf` at `underlying.len()` - // is valid for writing, but may be uninitialized. - // - // this function is essentially equivalent to `Vec::push` specialized for the case that - // `len < buf.capacity()`: - // https://github.com/rust-lang/rust/blob/be9e27e/library/alloc/src/vec/mod.rs#L1993-L2006 - unsafe { - let underlying = self.buf.as_mut_vec(); - // `InstructionTextSink::write_char` is only used by yaxpeax-x86, and is only used to - // write single ASCII characters. this is wrong in the general case, but `write_char` - // here is not going to be used in the general case. - if cfg!(debug_asertions) { - panic!("InstructionTextSink::write_char would truncate output"); - } - let to_push = c as u8; - // `ptr::write` here because `underlying.add(underlying.len())` may not point to an - // initialized value, which would mean that turning that pointer into a `&mut u8` to - // store through would be UB. `ptr::write` avoids taking the mut ref. - underlying.as_mut_ptr().offset(underlying.len() as isize).write(to_push); - // we have initialized all (one) bytes that `set_len` is increasing the length to - // include. - underlying.set_len(underlying.len() + 1); - } - Ok(()) - } -} - -/// this DisplaySink impl exists to support somewhat more performant buffering of the kinds of -/// strings `yaxpeax-x86` uses in formatting instructions. -impl DisplaySink for alloc::string::String { - #[inline(always)] - fn write_fixed_size(&mut self, s: &str) -> Result<(), core::fmt::Error> { - self.reserve(s.len()); - let buf = unsafe { self.as_mut_vec() }; - let new_bytes = s.as_bytes(); - - if new_bytes.len() == 0 { - unsafe { unreachable_kinda_unchecked() } - } - - if new_bytes.len() >= 16 { - unsafe { unreachable_kinda_unchecked() } - } - - unsafe { - let dest = buf.as_mut_ptr().offset(buf.len() as isize); - - // this used to be enough to bamboozle llvm away from - // https://github.com/rust-lang/rust/issues/92993#issuecomment-2028915232 - // if `s` is not fixed size. somewhere between Rust 1.68 and Rust 1.74 this stopped - // being sufficient, so `write_fixed_size` truly should only be used for fixed size `s` - // (otherwise this is a libc memcpy call in disguise). for fixed-size strings this - // unrolls into some kind of appropriate series of `mov`. - dest.offset(0 as isize).write(new_bytes[0]); - for i in 1..new_bytes.len() { - dest.offset(i as isize).write(new_bytes[i]); - } - - buf.set_len(buf.len() + new_bytes.len()); - } - - Ok(()) - } - unsafe fn write_lt_32(&mut self, s: &str) -> Result<(), fmt::Error> { - self.reserve(s.len()); - - // SAFETY: todo - let buf = unsafe { self.as_mut_vec() }; - let new_bytes = s.as_bytes(); - - // should get DCE - if new_bytes.len() >= 32 { - unsafe { core::hint::unreachable_unchecked() } - } - - unsafe { - let dest = buf.as_mut_ptr().offset(buf.len() as isize); - let src = new_bytes.as_ptr(); - - let rem = new_bytes.len() as isize; - - // set_len early because there is no way to avoid the following asm!() writing that - // same number of bytes into buf - buf.set_len(buf.len() + new_bytes.len()); - - core::arch::asm!( - "6:", - "cmp {rem:e}, 16", - "jb 7f", - "mov {buf:r}, qword ptr [{src} + {rem} - 16]", - "mov qword ptr [{dest} + {rem} - 16], {buf:r}", - "mov {buf:r}, qword ptr [{src} + {rem} - 8]", - "mov qword ptr [{dest} + {rem} - 8], {buf:r}", - "sub {rem:e}, 16", - "jz 11f", - "7:", - "cmp {rem:e}, 8", - "jb 8f", - "mov {buf:r}, qword ptr [{src} + {rem} - 8]", - "mov qword ptr [{dest} + {rem} - 8], {buf:r}", - "sub {rem:e}, 8", - "jz 11f", - "8:", - "cmp {rem:e}, 4", - "jb 9f", - "mov {buf:e}, dword ptr [{src} + {rem} - 4]", - "mov dword ptr [{dest} + {rem} - 4], {buf:e}", - "sub {rem:e}, 4", - "jz 11f", - "9:", - "cmp {rem:e}, 2", - "jb 10f", - "mov {buf:x}, word ptr [{src} + {rem} - 2]", - "mov word ptr [{dest} + {rem} - 2], {buf:x}", - "sub {rem:e}, 2", - "jz 11f", - "10:", - "cmp {rem:e}, 1", - "jb 11f", - "mov {buf:l}, byte ptr [{src} + {rem} - 1]", - "mov byte ptr [{dest} + {rem} - 1], {buf:l}", - "11:", - src = in(reg) src, - dest = in(reg) dest, - rem = inout(reg) rem => _, - buf = out(reg) _, - options(nostack), - ); - } - /* - for i in 0..new_bytes.len() { - unsafe { - buf.as_mut_ptr().offset(buf.len() as isize).offset(i as isize).write_volatile(new_bytes[i]); - } - } - */ - - Ok(()) - } - unsafe fn write_lt_16(&mut self, s: &str) -> Result<(), fmt::Error> { - self.reserve(s.len()); - - // SAFETY: todo - let buf = unsafe { self.as_mut_vec() }; - let new_bytes = s.as_bytes(); - - // should get DCE - if new_bytes.len() >= 16 { - unsafe { core::hint::unreachable_unchecked() } - } - - unsafe { - let dest = buf.as_mut_ptr().offset(buf.len() as isize); - let src = new_bytes.as_ptr(); - - let rem = new_bytes.len() as isize; - - // set_len early because there is no way to avoid the following asm!() writing that - // same number of bytes into buf - buf.set_len(buf.len() + new_bytes.len()); - - core::arch::asm!( - "7:", - "cmp {rem:e}, 8", - "jb 8f", - "mov {buf:r}, qword ptr [{src} + {rem} - 8]", - "mov qword ptr [{dest} + {rem} - 8], {buf:r}", - "sub {rem:e}, 8", - "jz 11f", - "8:", - "cmp {rem:e}, 4", - "jb 9f", - "mov {buf:e}, dword ptr [{src} + {rem} - 4]", - "mov dword ptr [{dest} + {rem} - 4], {buf:e}", - "sub {rem:e}, 4", - "jz 11f", - "9:", - "cmp {rem:e}, 2", - "jb 10f", - "mov {buf:x}, word ptr [{src} + {rem} - 2]", - "mov word ptr [{dest} + {rem} - 2], {buf:x}", - "sub {rem:e}, 2", - "jz 11f", - "10:", - "cmp {rem:e}, 1", - "jb 11f", - "mov {buf:l}, byte ptr [{src} + {rem} - 1]", - "mov byte ptr [{dest} + {rem} - 1], {buf:l}", - "11:", - src = in(reg) src, - dest = in(reg) dest, - rem = inout(reg) rem => _, - buf = out(reg) _, - options(nostack), - ); - } - /* - for i in 0..new_bytes.len() { - unsafe { - buf.as_mut_ptr().offset(buf.len() as isize).offset(i as isize).write_volatile(new_bytes[i]); - } - } - */ - - Ok(()) - } - unsafe fn write_lt_8(&mut self, s: &str) -> Result<(), fmt::Error> { - self.reserve(s.len()); - - // SAFETY: todo - let buf = unsafe { self.as_mut_vec() }; - let new_bytes = s.as_bytes(); - - // should get DCE - if new_bytes.len() >= 8 { - unsafe { core::hint::unreachable_unchecked() } - } - - unsafe { - let dest = buf.as_mut_ptr().offset(buf.len() as isize); - let src = new_bytes.as_ptr(); - - let rem = new_bytes.len() as isize; - - // set_len early because there is no way to avoid the following asm!() writing that - // same number of bytes into buf - buf.set_len(buf.len() + new_bytes.len()); - - core::arch::asm!( - "8:", - "cmp {rem:e}, 4", - "jb 9f", - "mov {buf:e}, dword ptr [{src} + {rem} - 4]", - "mov dword ptr [{dest} + {rem} - 4], {buf:e}", - "sub {rem:e}, 4", - "jz 11f", - "9:", - "cmp {rem:e}, 2", - "jb 10f", - "mov {buf:x}, word ptr [{src} + {rem} - 2]", - "mov word ptr [{dest} + {rem} - 2], {buf:x}", - "sub {rem:e}, 2", - "jz 11f", - "10:", - "cmp {rem:e}, 1", - "jb 11f", - "mov {buf:l}, byte ptr [{src} + {rem} - 1]", - "mov byte ptr [{dest} + {rem} - 1], {buf:l}", - "11:", - src = in(reg) src, - dest = in(reg) dest, - rem = inout(reg) rem => _, - buf = out(reg) _, - options(nostack), - ); - } - /* - for i in 0..new_bytes.len() { - unsafe { - buf.as_mut_ptr().offset(buf.len() as isize).offset(i as isize).write_volatile(new_bytes[i]); - } - } - */ - - Ok(()) - } - /// write a u8 to the output as a base-16 integer. - /// - /// this is provided for optimization opportunities when the formatted integer can be written - /// directly to the sink (rather than formatted to an intermediate buffer and output as a - /// followup step) - #[inline(always)] - fn write_u8(&mut self, mut v: u8) -> Result<(), core::fmt::Error> { - if v == 0 { - return self.write_fixed_size("0"); - } - // we can fairly easily predict the size of a formatted string here with lzcnt, which also - // means we can write directly into the correct offsets of the output string. - let printed_size = ((8 - v.leading_zeros() + 3) >> 2) as usize; - - self.reserve(printed_size); - - let buf = unsafe { self.as_mut_vec() }; - let new_len = buf.len() + printed_size; - - unsafe { - buf.set_len(new_len); - } - let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; - - loop { - let digit = v % 16; - let c = c_to_hex(digit as u8); - unsafe { - p = p.offset(-1); - p.write(c); - } - v = v / 16; - if v == 0 { - break; - } - } - - Ok(()) - } - /// write a u16 to the output as a base-16 integer. - /// - /// this is provided for optimization opportunities when the formatted integer can be written - /// directly to the sink (rather than formatted to an intermediate buffer and output as a - /// followup step) - #[inline(always)] - fn write_u16(&mut self, mut v: u16) -> Result<(), core::fmt::Error> { - if v == 0 { - return self.write_fixed_size("0"); - } - // we can fairly easily predict the size of a formatted string here with lzcnt, which also - // means we can write directly into the correct offsets of the output string. - let printed_size = ((16 - v.leading_zeros() + 3) >> 2) as usize; - - self.reserve(printed_size); - - let buf = unsafe { self.as_mut_vec() }; - let new_len = buf.len() + printed_size; - - unsafe { - buf.set_len(new_len); - } - let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; - - loop { - let digit = v % 16; - let c = c_to_hex(digit as u8); - unsafe { - p = p.offset(-1); - p.write(c); - } - v = v / 16; - if v == 0 { - break; - } - } - - Ok(()) - } - /// write a u32 to the output as a base-16 integer. - /// - /// this is provided for optimization opportunities when the formatted integer can be written - /// directly to the sink (rather than formatted to an intermediate buffer and output as a - /// followup step) - #[inline(always)] - fn write_u32(&mut self, mut v: u32) -> Result<(), core::fmt::Error> { - if v == 0 { - return self.write_fixed_size("0"); - } - // we can fairly easily predict the size of a formatted string here with lzcnt, which also - // means we can write directly into the correct offsets of the output string. - let printed_size = ((32 - v.leading_zeros() + 3) >> 2) as usize; - - self.reserve(printed_size); - - let buf = unsafe { self.as_mut_vec() }; - let new_len = buf.len() + printed_size; - - unsafe { - buf.set_len(new_len); - } - let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; - - loop { - let digit = v % 16; - let c = c_to_hex(digit as u8); - unsafe { - p = p.offset(-1); - p.write(c); - } - v = v / 16; - if v == 0 { - break; - } - } - - Ok(()) - } - /// write a u64 to the output as a base-16 integer. - /// - /// this is provided for optimization opportunities when the formatted integer can be written - /// directly to the sink (rather than formatted to an intermediate buffer and output as a - /// followup step) - #[inline(always)] - fn write_u64(&mut self, mut v: u64) -> Result<(), core::fmt::Error> { - if v == 0 { - return self.write_fixed_size("0"); - } - // we can fairly easily predict the size of a formatted string here with lzcnt, which also - // means we can write directly into the correct offsets of the output string. - let printed_size = ((64 - v.leading_zeros() + 3) >> 2) as usize; - - self.reserve(printed_size); - - let buf = unsafe { self.as_mut_vec() }; - let new_len = buf.len() + printed_size; - - unsafe { - buf.set_len(new_len); - } - let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; - - loop { - let digit = v % 16; - let c = c_to_hex(digit as u8); - unsafe { - p = p.offset(-1); - p.write(c); - } - v = v / 16; - if v == 0 { - break; - } - } - - Ok(()) - } - fn span_start(&mut self, _ty: TokenType) {} - fn span_end(&mut self, _ty: TokenType) {} -} - -impl<'buf> DisplaySink for InstructionTextSink<'buf> { - #[inline(always)] - fn write_fixed_size(&mut self, s: &str) -> Result<(), core::fmt::Error> { - if cfg!(debug_assertions) { - if self.buf.capacity() < self.buf.len() + s.len() { - panic!("InstructionTextSink::write_fixed_size would overflow output"); - } - } - - let buf = unsafe { self.buf.as_mut_vec() }; - let new_bytes = s.as_bytes(); - - if new_bytes.len() == 0 { - return Ok(()); - } - - if new_bytes.len() >= 16 { - unsafe { unreachable_kinda_unchecked() } - } - - unsafe { - let dest = buf.as_mut_ptr().offset(buf.len() as isize); - - // this used to be enough to bamboozle llvm away from - // https://github.com/rust-lang/rust/issues/92993#issuecomment-2028915232https://github.com/rust-lang/rust/issues/92993#issuecomment-2028915232 - // if `s` is not fixed size. somewhere between Rust 1.68 and Rust 1.74 this stopped - // being sufficient, so `write_fixed_size` truly should only be used for fixed size `s` - // (otherwise this is a libc memcpy call in disguise). for fixed-size strings this - // unrolls into some kind of appropriate series of `mov`. - dest.offset(0 as isize).write(new_bytes[0]); - for i in 1..new_bytes.len() { - dest.offset(i as isize).write(new_bytes[i]); - } - - buf.set_len(buf.len() + new_bytes.len()); - } - - Ok(()) - } - unsafe fn write_lt_32(&mut self, s: &str) -> Result<(), fmt::Error> { - if cfg!(debug_assertions) { - if self.buf.capacity() < self.buf.len() + s.len() { - panic!("InstructionTextSink::write_lt_32 would overflow output"); - } - } - - // SAFETY: todo - let buf = unsafe { self.buf.as_mut_vec() }; - let new_bytes = s.as_bytes(); - - // should get DCE - if new_bytes.len() >= 32 { - unsafe { core::hint::unreachable_unchecked() } - } - - unsafe { - let dest = buf.as_mut_ptr().offset(buf.len() as isize); - let src = new_bytes.as_ptr(); - - let rem = new_bytes.len() as isize; - - // set_len early because there is no way to avoid the following asm!() writing that - // same number of bytes into buf - buf.set_len(buf.len() + new_bytes.len()); - - core::arch::asm!( - "6:", - "cmp {rem:e}, 16", - "jb 7f", - "mov {buf:r}, qword ptr [{src} + {rem} - 16]", - "mov qword ptr [{dest} + {rem} - 16], {buf:r}", - "mov {buf:r}, qword ptr [{src} + {rem} - 8]", - "mov qword ptr [{dest} + {rem} - 8], {buf:r}", - "sub {rem:e}, 16", - "jz 11f", - "7:", - "cmp {rem:e}, 8", - "jb 8f", - "mov {buf:r}, qword ptr [{src} + {rem} - 8]", - "mov qword ptr [{dest} + {rem} - 8], {buf:r}", - "sub {rem:e}, 8", - "jz 11f", - "8:", - "cmp {rem:e}, 4", - "jb 9f", - "mov {buf:e}, dword ptr [{src} + {rem} - 4]", - "mov dword ptr [{dest} + {rem} - 4], {buf:e}", - "sub {rem:e}, 4", - "jz 11f", - "9:", - "cmp {rem:e}, 2", - "jb 10f", - "mov {buf:x}, word ptr [{src} + {rem} - 2]", - "mov word ptr [{dest} + {rem} - 2], {buf:x}", - "sub {rem:e}, 2", - "jz 11f", - "10:", - "cmp {rem:e}, 1", - "jb 11f", - "mov {buf:l}, byte ptr [{src} + {rem} - 1]", - "mov byte ptr [{dest} + {rem} - 1], {buf:l}", - "11:", - src = in(reg) src, - dest = in(reg) dest, - rem = inout(reg) rem => _, - buf = out(reg) _, - options(nostack), - ); - } - /* - for i in 0..new_bytes.len() { - unsafe { - buf.as_mut_ptr().offset(buf.len() as isize).offset(i as isize).write_volatile(new_bytes[i]); - } - } - */ - - Ok(()) - } - unsafe fn write_lt_16(&mut self, s: &str) -> Result<(), fmt::Error> { - if cfg!(debug_assertions) { - if self.buf.capacity() < self.buf.len() + s.len() { - panic!("InstructionTextSink::write_lt_16 would overflow output"); - } - } - - // SAFETY: todo - let buf = unsafe { self.buf.as_mut_vec() }; - let new_bytes = s.as_bytes(); - - // should get DCE - if new_bytes.len() >= 16 { - unsafe { core::hint::unreachable_unchecked() } - } - - unsafe { - let dest = buf.as_mut_ptr().offset(buf.len() as isize); - let src = new_bytes.as_ptr(); - - let rem = new_bytes.len() as isize; - - // set_len early because there is no way to avoid the following asm!() writing that - // same number of bytes into buf - buf.set_len(buf.len() + new_bytes.len()); - - core::arch::asm!( - "7:", - "cmp {rem:e}, 8", - "jb 8f", - "mov {buf:r}, qword ptr [{src} + {rem} - 8]", - "mov qword ptr [{dest} + {rem} - 8], {buf:r}", - "sub {rem:e}, 8", - "jz 11f", - "8:", - "cmp {rem:e}, 4", - "jb 9f", - "mov {buf:e}, dword ptr [{src} + {rem} - 4]", - "mov dword ptr [{dest} + {rem} - 4], {buf:e}", - "sub {rem:e}, 4", - "jz 11f", - "9:", - "cmp {rem:e}, 2", - "jb 10f", - "mov {buf:x}, word ptr [{src} + {rem} - 2]", - "mov word ptr [{dest} + {rem} - 2], {buf:x}", - "sub {rem:e}, 2", - "jz 11f", - "10:", - "cmp {rem:e}, 1", - "jb 11f", - "mov {buf:l}, byte ptr [{src} + {rem} - 1]", - "mov byte ptr [{dest} + {rem} - 1], {buf:l}", - "11:", - src = in(reg) src, - dest = in(reg) dest, - rem = inout(reg) rem => _, - buf = out(reg) _, - options(nostack), - ); - } - /* - for i in 0..new_bytes.len() { - unsafe { - buf.as_mut_ptr().offset(buf.len() as isize).offset(i as isize).write_volatile(new_bytes[i]); - } - } - */ - - Ok(()) - } - unsafe fn write_lt_8(&mut self, s: &str) -> Result<(), fmt::Error> { - if cfg!(debug_assertions) { - if self.buf.capacity() < self.buf.len() + s.len() { - panic!("InstructionTextSink::write_lt_8 would overflow output"); - } - } - - // SAFETY: todo - let buf = unsafe { self.buf.as_mut_vec() }; - let new_bytes = s.as_bytes(); - - // should get DCE - if new_bytes.len() >= 8 { - unsafe { core::hint::unreachable_unchecked() } - } - - unsafe { - let dest = buf.as_mut_ptr().offset(buf.len() as isize); - let src = new_bytes.as_ptr(); - - let rem = new_bytes.len() as isize; - - // set_len early because there is no way to avoid the following asm!() writing that - // same number of bytes into buf - buf.set_len(buf.len() + new_bytes.len()); - - core::arch::asm!( - "8:", - "cmp {rem:e}, 4", - "jb 9f", - "mov {buf:e}, dword ptr [{src} + {rem} - 4]", - "mov dword ptr [{dest} + {rem} - 4], {buf:e}", - "sub {rem:e}, 4", - "jz 11f", - "9:", - "cmp {rem:e}, 2", - "jb 10f", - "mov {buf:x}, word ptr [{src} + {rem} - 2]", - "mov word ptr [{dest} + {rem} - 2], {buf:x}", - "sub {rem:e}, 2", - "jz 11f", - "10:", - "cmp {rem:e}, 1", - "jb 11f", - "mov {buf:l}, byte ptr [{src} + {rem} - 1]", - "mov byte ptr [{dest} + {rem} - 1], {buf:l}", - "11:", - src = in(reg) src, - dest = in(reg) dest, - rem = inout(reg) rem => _, - buf = out(reg) _, - options(nostack), - ); - } - /* - for i in 0..new_bytes.len() { - unsafe { - buf.as_mut_ptr().offset(buf.len() as isize).offset(i as isize).write_volatile(new_bytes[i]); - } - } - */ - - Ok(()) - } - /// write a u8 to the output as a base-16 integer. - /// - /// this is provided for optimization opportunities when the formatted integer can be written - /// directly to the sink (rather than formatted to an intermediate buffer and output as a - /// followup step) - #[inline(always)] - fn write_u8(&mut self, mut v: u8) -> Result<(), core::fmt::Error> { - if v == 0 { - return self.write_fixed_size("0"); - } - // we can fairly easily predict the size of a formatted string here with lzcnt, which also - // means we can write directly into the correct offsets of the output string. - let printed_size = ((8 - v.leading_zeros() + 3) >> 2) as usize; - - if cfg!(debug_assertions) { - if self.buf.capacity() < self.buf.len() + printed_size { - panic!("InstructionTextSink::write_u8 would overflow output"); - } - } - - let buf = unsafe { self.buf.as_mut_vec() }; - let new_len = buf.len() + printed_size; - - unsafe { - buf.set_len(new_len); - } - let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; - - loop { - let digit = v % 16; - let c = c_to_hex(digit as u8); - unsafe { - p = p.offset(-1); - p.write(c); - } - v = v / 16; - if v == 0 { - break; - } - } - - Ok(()) - } - /// write a u16 to the output as a base-16 integer. - /// - /// this is provided for optimization opportunities when the formatted integer can be written - /// directly to the sink (rather than formatted to an intermediate buffer and output as a - /// followup step) - #[inline(always)] - fn write_u16(&mut self, mut v: u16) -> Result<(), core::fmt::Error> { - if v == 0 { - return self.write_fixed_size("0"); - } - - // we can fairly easily predict the size of a formatted string here with lzcnt, which also - // means we can write directly into the correct offsets of the output string. - let printed_size = ((16 - v.leading_zeros() + 3) >> 2) as usize; - - if cfg!(debug_assertions) { - if self.buf.capacity() < self.buf.len() + printed_size { - panic!("InstructionTextSink::write_u16 would overflow output"); - } - } - - let buf = unsafe { self.buf.as_mut_vec() }; - let new_len = buf.len() + printed_size; - - unsafe { - buf.set_len(new_len); - } - let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; - - loop { - let digit = v % 16; - let c = c_to_hex(digit as u8); - unsafe { - p = p.offset(-1); - p.write(c); - } - v = v / 16; - if v == 0 { - break; - } - } - - Ok(()) - } - /// write a u32 to the output as a base-16 integer. - /// - /// this is provided for optimization opportunities when the formatted integer can be written - /// directly to the sink (rather than formatted to an intermediate buffer and output as a - /// followup step) - #[inline(always)] - fn write_u32(&mut self, mut v: u32) -> Result<(), core::fmt::Error> { - if v == 0 { - return self.write_fixed_size("0"); - } - - // we can fairly easily predict the size of a formatted string here with lzcnt, which also - // means we can write directly into the correct offsets of the output string. - let printed_size = ((32 - v.leading_zeros() + 3) >> 2) as usize; - - if cfg!(debug_assertions) { - if self.buf.capacity() < self.buf.len() + printed_size { - panic!("InstructionTextSink::write_u32 would overflow output"); - } - } - - let buf = unsafe { self.buf.as_mut_vec() }; - let new_len = buf.len() + printed_size; - - unsafe { - buf.set_len(new_len); - } - let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; - - loop { - let digit = v % 16; - let c = c_to_hex(digit as u8); - unsafe { - p = p.offset(-1); - p.write(c); - } - v = v / 16; - if v == 0 { - break; - } - } - - Ok(()) - } - /// write a u64 to the output as a base-16 integer. - /// - /// this is provided for optimization opportunities when the formatted integer can be written - /// directly to the sink (rather than formatted to an intermediate buffer and output as a - /// followup step) - #[inline(always)] - fn write_u64(&mut self, mut v: u64) -> Result<(), core::fmt::Error> { - if v == 0 { - return self.write_fixed_size("0"); - } - - // we can fairly easily predict the size of a formatted string here with lzcnt, which also - // means we can write directly into the correct offsets of the output string. - let printed_size = ((64 - v.leading_zeros() + 3) >> 2) as usize; - - if cfg!(debug_assertions) { - if self.buf.capacity() < self.buf.len() + printed_size { - panic!("InstructionTextSink::write_u64 would overflow output"); - } - } - - let buf = unsafe { self.buf.as_mut_vec() }; - let new_len = buf.len() + printed_size; - - unsafe { - buf.set_len(new_len); - } - let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; - - loop { - let digit = v % 16; - let c = c_to_hex(digit as u8); - unsafe { - p = p.offset(-1); - p.write(c); - } - v = v / 16; - if v == 0 { - break; - } - } - - Ok(()) - } - fn span_start(&mut self, _ty: TokenType) {} - fn span_end(&mut self, _ty: TokenType) {} -} diff --git a/src/lib.rs b/src/lib.rs index 491b6f0..a7b8531 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -139,8 +139,6 @@ pub mod real_mode; pub use real_mode::Arch as x86_16; mod safer_unchecked; -#[cfg(feature = "fmt")] -pub mod display; const MEM_SIZE_STRINGS: [&'static str; 65] = [ "BUG", diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index 3615538..18314ab 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -1,14 +1,12 @@ use core::fmt; use yaxpeax_arch::{Colorize, ShowContextual, NoColors, YaxColors}; -use yaxpeax_arch::display::*; use crate::safer_unchecked::GetSaferUnchecked as _; use crate::MEM_SIZE_STRINGS; use crate::long_mode::{RegSpec, Opcode, Operand, MergeMode, InstDecoder, Instruction, Segment, PrefixRex, OperandSpec}; -use crate::display::DisplaySink; -use crate::display::TokenType; +use yaxpeax_arch::display::DisplaySink; impl fmt::Display for InstDecoder { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { @@ -166,202 +164,13 @@ impl fmt::Display for Operand { impl Colorize for Operand { fn colorize(&self, colors: &Y, f: &mut T) -> fmt::Result { - match self { - &Operand::ImmediateU8(imm) => { - write!(f, "{}", colors.number(u8_hex(imm))) - } - &Operand::ImmediateI8(imm) => { - write!(f, "{}", - colors.number(signed_i8_hex(imm))) - }, - &Operand::ImmediateU16(imm) => { - write!(f, "{}", colors.number(u16_hex(imm))) - } - &Operand::ImmediateI16(imm) => { - write!(f, "{}", - colors.number(signed_i16_hex(imm))) - }, - &Operand::ImmediateU32(imm) => { - write!(f, "{}", colors.number(u32_hex(imm))) - } - &Operand::ImmediateI32(imm) => { - write!(f, "{}", - colors.number(signed_i32_hex(imm))) - }, - &Operand::ImmediateU64(imm) => { - write!(f, "{}", colors.number(u64_hex(imm))) - } - &Operand::ImmediateI64(imm) => { - write!(f, "{}", - colors.number(signed_i64_hex(imm))) - }, - &Operand::Register(ref spec) => { - f.write_str(regspec_label(spec)) - } - &Operand::RegisterMaskMerge(ref spec, ref mask, merge_mode) => { - f.write_str(regspec_label(spec))?; - if mask.num != 0 { - f.write_str("{")?; - f.write_str(regspec_label(mask))?; - f.write_str("}")?; - } - if let MergeMode::Zero = merge_mode { - f.write_str("{z}")?; - } - Ok(()) - } - &Operand::RegisterMaskMergeSae(ref spec, ref mask, merge_mode, sae_mode) => { - f.write_str(regspec_label(spec))?; - if mask.num != 0 { - f.write_str("{")?; - f.write_str(regspec_label(mask))?; - f.write_str("}")?; - } - if let MergeMode::Zero = merge_mode { - f.write_str("{z}")?; - } - f.write_str(sae_mode.label())?; - Ok(()) - } - &Operand::RegisterMaskMergeSaeNoround(ref spec, ref mask, merge_mode) => { - f.write_str(regspec_label(spec))?; - if mask.num != 0 { - f.write_str("{")?; - f.write_str(regspec_label(mask))?; - f.write_str("}")?; - } - if let MergeMode::Zero = merge_mode { - f.write_str("{z}")?; - } - f.write_str("{sae}")?; - Ok(()) - } - &Operand::DisplacementU32(imm) => { - write!(f, "[{}]", colors.address(u32_hex(imm))) - } - &Operand::DisplacementU64(imm) => { - write!(f, "[{}]", colors.address(u64_hex(imm))) - } - &Operand::RegDisp(ref spec, disp) => { - write!(f, "[{} ", regspec_label(spec))?; - format_number_i32(colors, f, disp, NumberStyleHint::HexSignedWithSignSplit)?; - write!(f, "]") - }, - &Operand::RegDeref(ref spec) => { - f.write_str("[")?; - f.write_str(regspec_label(spec))?; - f.write_str("]") - }, - &Operand::RegScale(ref spec, scale) => { - write!(f, "[{} * {}]", - regspec_label(spec), - colors.number(scale) - ) - }, - &Operand::RegScaleDisp(ref spec, scale, disp) => { - write!(f, "[{} * {} ", - regspec_label(spec), - colors.number(scale), - )?; - format_number_i32(colors, f, disp, NumberStyleHint::HexSignedWithSignSplit)?; - write!(f, "]") - }, - &Operand::RegIndexBase(ref base, ref index) => { - f.write_str("[")?; - f.write_str(regspec_label(base))?; - f.write_str(" + ")?; - f.write_str(regspec_label(index))?; - f.write_str("]") - } - &Operand::RegIndexBaseDisp(ref base, ref index, disp) => { - write!(f, "[{} + {} ", - regspec_label(base), - regspec_label(index), - )?; - format_number_i32(colors, f, disp, NumberStyleHint::HexSignedWithSignSplit)?; - write!(f, "]") - }, - &Operand::RegIndexBaseScale(ref base, ref index, scale) => { - write!(f, "[{} + {} * {}]", - regspec_label(base), - regspec_label(index), - colors.number(scale) - ) - } - &Operand::RegIndexBaseScaleDisp(ref base, ref index, scale, disp) => { - write!(f, "[{} + {} * {} ", - regspec_label(base), - regspec_label(index), - colors.number(scale), - )?; - format_number_i32(colors, f, disp, NumberStyleHint::HexSignedWithSignSplit)?; - write!(f, "]") - }, - &Operand::RegDispMasked(ref spec, disp, ref mask_reg) => { - write!(f, "[{} ", regspec_label(spec))?; - format_number_i32(colors, f, disp, NumberStyleHint::HexSignedWithSignSplit)?; - write!(f, "]")?; - write!(f, "{{{}}}", regspec_label(mask_reg)) - }, - &Operand::RegDerefMasked(ref spec, ref mask_reg) => { - f.write_str("[")?; - f.write_str(regspec_label(spec))?; - f.write_str("]")?; - write!(f, "{{{}}}", regspec_label(mask_reg)) - }, - &Operand::RegScaleMasked(ref spec, scale, ref mask_reg) => { - write!(f, "[{} * {}]", - regspec_label(spec), - colors.number(scale) - )?; - write!(f, "{{{}}}", regspec_label(mask_reg)) - }, - &Operand::RegScaleDispMasked(ref spec, scale, disp, ref mask_reg) => { - write!(f, "[{} * {} ", - regspec_label(spec), - colors.number(scale), - )?; - format_number_i32(colors, f, disp, NumberStyleHint::HexSignedWithSignSplit)?; - write!(f, "]")?; - write!(f, "{{{}}}", regspec_label(mask_reg)) - }, - &Operand::RegIndexBaseMasked(ref base, ref index, ref mask_reg) => { - f.write_str("[")?; - f.write_str(regspec_label(base))?; - f.write_str(" + ")?; - f.write_str(regspec_label(index))?; - f.write_str("]")?; - write!(f, "{{{}}}", regspec_label(mask_reg)) - } - &Operand::RegIndexBaseDispMasked(ref base, ref index, disp, ref mask_reg) => { - write!(f, "[{} + {} ", - regspec_label(base), - regspec_label(index), - )?; - format_number_i32(colors, f, disp, NumberStyleHint::HexSignedWithSignSplit)?; - write!(f, "]")?; - write!(f, "{{{}}}", regspec_label(mask_reg)) - }, - &Operand::RegIndexBaseScaleMasked(ref base, ref index, scale, ref mask_reg) => { - write!(f, "[{} + {} * {}]", - regspec_label(base), - regspec_label(index), - colors.number(scale) - )?; - write!(f, "{{{}}}", regspec_label(mask_reg)) - } - &Operand::RegIndexBaseScaleDispMasked(ref base, ref index, scale, disp, ref mask_reg) => { - write!(f, "[{} + {} * {} ", - regspec_label(base), - regspec_label(index), - colors.number(scale), - )?; - format_number_i32(colors, f, disp, NumberStyleHint::HexSignedWithSignSplit)?; - write!(f, "]")?; - write!(f, "{{{}}}", regspec_label(mask_reg)) - }, - &Operand::Nothing => { Ok(()) }, - } + let mut f = yaxpeax_arch::display::NoColorsSink { + out: f + }; + let mut visitor = ColorizingOperandVisitor { + f: &mut f + }; + self.visit(&mut visitor) } } @@ -375,15 +184,15 @@ impl crate::long_mode::OperandVisitor for ColorizingOperandVisi #[cfg_attr(feature="profiling", inline(never))] fn visit_u8(&mut self, imm: u8) -> Result { - self.f.span_start(TokenType::Immediate); + self.f.span_start_immediate(); self.f.write_fixed_size("0x")?; self.f.write_u8(imm)?; - self.f.span_end(TokenType::Immediate); + self.f.span_end_immediate(); Ok(()) } #[cfg_attr(feature="profiling", inline(never))] fn visit_i8(&mut self, imm: i8) -> Result { - self.f.span_start(TokenType::Immediate); + self.f.span_start_immediate(); let mut v = imm as u8; if imm < 0 { self.f.write_char('-')?; @@ -391,20 +200,20 @@ impl crate::long_mode::OperandVisitor for ColorizingOperandVisi } self.f.write_fixed_size("0x")?; self.f.write_u8(v)?; - self.f.span_end(TokenType::Immediate); + self.f.span_end_immediate(); Ok(()) } #[cfg_attr(feature="profiling", inline(never))] fn visit_u16(&mut self, imm: u16) -> Result { - self.f.span_start(TokenType::Immediate); + self.f.span_start_immediate(); self.f.write_fixed_size("0x")?; self.f.write_u16(imm)?; - self.f.span_end(TokenType::Immediate); + self.f.span_end_immediate(); Ok(()) } #[cfg_attr(feature="profiling", inline(never))] fn visit_i16(&mut self, imm: i16) -> Result { - self.f.span_start(TokenType::Immediate); + self.f.span_start_immediate(); let mut v = imm as u16; if imm < 0 { self.f.write_char('-')?; @@ -412,19 +221,19 @@ impl crate::long_mode::OperandVisitor for ColorizingOperandVisi } self.f.write_fixed_size("0x")?; self.f.write_u16(v)?; - self.f.span_end(TokenType::Immediate); + self.f.span_end_immediate(); Ok(()) } #[cfg_attr(feature="profiling", inline(never))] fn visit_u32(&mut self, imm: u32) -> Result { - self.f.span_start(TokenType::Immediate); + self.f.span_start_immediate(); self.f.write_fixed_size("0x")?; self.f.write_u32(imm)?; - self.f.span_end(TokenType::Immediate); + self.f.span_end_immediate(); Ok(()) } fn visit_i32(&mut self, imm: i32) -> Result { - self.f.span_start(TokenType::Immediate); + self.f.span_start_immediate(); let mut v = imm as u32; if imm < 0 { self.f.write_char('-')?; @@ -432,20 +241,20 @@ impl crate::long_mode::OperandVisitor for ColorizingOperandVisi } self.f.write_fixed_size("0x")?; self.f.write_u32(v)?; - self.f.span_end(TokenType::Immediate); + self.f.span_end_immediate(); Ok(()) } #[cfg_attr(feature="profiling", inline(never))] fn visit_u64(&mut self, imm: u64) -> Result { - self.f.span_start(TokenType::Immediate); + self.f.span_start_immediate(); self.f.write_fixed_size("0x")?; self.f.write_u64(imm)?; - self.f.span_end(TokenType::Immediate); + self.f.span_end_immediate(); Ok(()) } #[cfg_attr(feature="profiling", inline(never))] fn visit_i64(&mut self, imm: i64) -> Result { - self.f.span_start(TokenType::Immediate); + self.f.span_start_immediate(); let mut v = imm as u64; if imm < 0 { self.f.write_char('-')?; @@ -453,25 +262,25 @@ impl crate::long_mode::OperandVisitor for ColorizingOperandVisi } self.f.write_fixed_size("0x")?; self.f.write_u64(v)?; - self.f.span_end(TokenType::Immediate); + self.f.span_end_immediate(); Ok(()) } #[cfg_attr(feature="profiling", inline(never))] fn visit_reg(&mut self, reg: RegSpec) -> Result { - self.f.span_start(TokenType::Register); + self.f.span_start_register(); unsafe { self.f.write_lt_8(regspec_label(®))?; } - self.f.span_end(TokenType::Register); + self.f.span_end_register(); Ok(()) } fn visit_reg_mask_merge(&mut self, spec: RegSpec, mask: RegSpec, merge_mode: MergeMode) -> Result { - self.f.span_start(TokenType::Register); + self.f.span_start_register(); unsafe { self.f.write_lt_8(regspec_label(&spec))?; } - self.f.span_end(TokenType::Register); + self.f.span_end_register(); if mask.num != 0 { self.f.write_fixed_size("{")?; - self.f.span_start(TokenType::Register); + self.f.span_start_register(); unsafe { self.f.write_lt_8(regspec_label(&mask))?; } - self.f.span_end(TokenType::Register); + self.f.span_end_register(); self.f.write_fixed_size("}")?; } if let MergeMode::Zero = merge_mode { @@ -3923,20 +3732,30 @@ pub(crate) fn contextualize_c(instr: &Instruction, out: &mut T) } fn write_jmp_operand(op: Operand, out: &mut T) -> fmt::Result { + let mut out = yaxpeax_arch::display::NoColorsSink { + out, + }; + use core::fmt::Write; match op { Operand::ImmediateI8(rel) => { - if rel >= 0 { - write!(out, "$+{}", (signed_i32_hex(rel as i32))) + let rel = if rel >= 0 { + out.write_str("$+")?; + rel as u8 } else { - write!(out, "${}", (signed_i32_hex(rel as i32))) - } + out.write_str("$-")?; + rel.unsigned_abs() + }; + out.write_prefixed_u8(rel) } Operand::ImmediateI32(rel) => { - if rel >= 0 { - write!(out, "$+{}", (signed_i32_hex(rel))) + let rel = if rel >= 0 { + out.write_str("$+")?; + rel as u32 } else { - write!(out, "${}", (signed_i32_hex(rel))) - } + out.write_str("$-")?; + rel.unsigned_abs() + }; + out.write_prefixed_u32(rel) } other => { write!(out, "{}", other) @@ -4217,7 +4036,7 @@ impl <'instr, T: fmt::Write, Y: YaxColors> ShowContextual style, } = self; - let mut out = crate::display::NoColorsSink { + let mut out = yaxpeax_arch::display::NoColorsSink { out: out, }; @@ -4235,7 +4054,7 @@ impl <'instr, T: fmt::Write, Y: YaxColors> ShowContextual #[cfg(feature="std")] impl ShowContextual], T, Y> for Instruction { fn contextualize(&self, colors: &Y, _address: u64, context: Option<&[Option]>, out: &mut T) -> fmt::Result { - let mut out = crate::display::NoColorsSink { + let mut out = yaxpeax_arch::display::NoColorsSink { out, }; let out = &mut out; @@ -4539,11 +4358,11 @@ impl InstructionTextBuffer { /// into. /// /// SAFETY: callers must print at most one instruction into this handle. - unsafe fn write_handle(&mut self) -> crate::display::InstructionTextSink { + unsafe fn write_handle(&mut self) -> yaxpeax_arch::display::InstructionTextSink { self.content.clear(); // Safety: `content` was just cleared, so writing begins at the start of the buffer. // `content`is large enough to hold a fully-formatted instruction (see // `InstructionTextBuffer::new`). - crate::display::InstructionTextSink::new(&mut self.content) + yaxpeax_arch::display::InstructionTextSink::new(&mut self.content) } } diff --git a/src/long_mode/mod.rs b/src/long_mode/mod.rs index 9aeacdc..d66f59a 100644 --- a/src/long_mode/mod.rs +++ b/src/long_mode/mod.rs @@ -802,6 +802,52 @@ impl Operand { } } } + + /// provided for parity with [`Instruction::visit_operand`]. this has little utility other than + /// to reuse an `OperandVisitor` on an `Operand` directly. + pub fn visit(&self, visitor: &mut T) -> Result { + match self { + Operand::Nothing => { + visitor.visit_other() + } + Operand::Register(reg) => { + visitor.visit_reg(*reg) + } + Operand::RegDeref(reg) => { + visitor.visit_deref(*reg) + } + Operand::RegDisp(reg, disp) => { + visitor.visit_disp(*reg, *disp) + } + Operand::ImmediateI8(imm) => visitor.visit_i8(*imm), + Operand::ImmediateU8(imm) => visitor.visit_u8(*imm), + Operand::ImmediateI16(imm) => visitor.visit_i16(*imm), + Operand::ImmediateU16(imm) => visitor.visit_u16(*imm), + Operand::ImmediateI32(imm) => visitor.visit_i32(*imm), + Operand::ImmediateU32(imm) => visitor.visit_u32(*imm), + Operand::ImmediateI64(imm) => visitor.visit_i64(*imm), + Operand::ImmediateU64(imm) => visitor.visit_u64(*imm), + Operand::DisplacementU32(disp) => visitor.visit_abs_u32(*disp), + Operand::DisplacementU64(disp) => visitor.visit_abs_u64(*disp), + Operand::RegScale(reg, scale) => visitor.visit_reg_scale(*reg, *scale), + Operand::RegScaleDisp(reg, scale, disp) => visitor.visit_reg_scale_disp(*reg, *scale, *disp), + Operand::RegIndexBase(_, _) => { /* not actually reachable anymore */ visitor.visit_other() }, + Operand::RegIndexBaseDisp(_, _, _) => { /* not actually reachable anymore */ visitor.visit_other() }, + Operand::RegIndexBaseScale(base, index, scale) => visitor.visit_index_base_scale(*base, *index, *scale), + Operand::RegIndexBaseScaleDisp(base, index, scale, disp) => visitor.visit_index_base_scale_disp(*base, *index, *scale, *disp), + Operand::RegisterMaskMerge(reg, mask, merge) => visitor.visit_reg_mask_merge(*reg, *mask, *merge), + Operand::RegisterMaskMergeSae(reg, mask, merge, sae) => visitor.visit_reg_mask_merge_sae(*reg, *mask, *merge, *sae), + Operand::RegisterMaskMergeSaeNoround(reg, mask, merge) => visitor.visit_reg_mask_merge_sae_noround(*reg, *mask, *merge), + Operand::RegDerefMasked(reg, mask) => visitor.visit_reg_deref_masked(*reg, *mask), + Operand::RegDispMasked(reg, disp, mask) => visitor.visit_reg_disp_masked(*reg, *disp, *mask), + Operand::RegScaleMasked(reg, scale, mask) => visitor.visit_reg_scale_masked(*reg, *scale, *mask), + Operand::RegIndexBaseMasked(_, _, _) => { /* not actually reachable anymore */ visitor.visit_other() }, + Operand::RegIndexBaseDispMasked(_, _, _, _) => { /* not actually reachable anymore */ visitor.visit_other() }, + Operand::RegScaleDispMasked(base, scale, disp, mask) => visitor.visit_reg_scale_disp_masked(*base, *scale, *disp, *mask), + Operand::RegIndexBaseScaleMasked(base, index, scale, mask) => visitor.visit_index_base_scale_masked(*base, *index, *scale, *mask), + Operand::RegIndexBaseScaleDispMasked(base, index, scale, disp, mask) => visitor.visit_index_base_scale_disp_masked(*base, *index, *scale, *disp, *mask), + } + } } #[test] diff --git a/test/long_mode/mod.rs b/test/long_mode/mod.rs index 7742496..96c7c79 100644 --- a/test/long_mode/mod.rs +++ b/test/long_mode/mod.rs @@ -79,7 +79,7 @@ fn test_display_under(decoder: &InstDecoder, data: &[u8], expected: &'static str ); let mut text2 = String::new(); - let mut out = yaxpeax_x86::display::NoColorsSink { + let mut out = yaxpeax_arch::display::NoColorsSink { out: &mut text2, }; instr.write_to(&mut out).expect("printing succeeds"); From bebdead98a9fa6510e30dbde6cc694f52a346a1e Mon Sep 17 00:00:00 2001 From: iximeow Date: Sat, 22 Jun 2024 15:18:31 -0700 Subject: [PATCH 60/95] NoColorsSink has a decent name now --- src/long_mode/display.rs | 43 ++++++++++++++++++++-------------------- test/long_mode/mod.rs | 6 ++---- 2 files changed, 24 insertions(+), 25 deletions(-) diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index 18314ab..e8000ed 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -163,22 +163,20 @@ impl fmt::Display for Operand { } impl Colorize for Operand { - fn colorize(&self, colors: &Y, f: &mut T) -> fmt::Result { - let mut f = yaxpeax_arch::display::NoColorsSink { - out: f - }; - let mut visitor = ColorizingOperandVisitor { + fn colorize(&self, _colors: &Y, f: &mut T) -> fmt::Result { + let mut f = yaxpeax_arch::display::FmtSink::new(f); + let mut visitor = DisplayingOperandVisitor { f: &mut f }; self.visit(&mut visitor) } } -struct ColorizingOperandVisitor<'a, T> { +struct DisplayingOperandVisitor<'a, T> { f: &'a mut T, } -impl crate::long_mode::OperandVisitor for ColorizingOperandVisitor<'_, T> { +impl crate::long_mode::OperandVisitor for DisplayingOperandVisitor<'_, T> { type Ok = (); type Error = core::fmt::Error; @@ -3536,8 +3534,17 @@ impl <'instr, T: fmt::Write, Y: YaxColors> Colorize for InstructionDisplay struct NoContext; impl Instruction { + /// format this instruction into `out` as a plain text string. #[cfg_attr(feature="profiling", inline(never))] - pub fn write_to(&self, out: &mut T) -> fmt::Result { + pub fn write_to(&self, out: &mut T) -> fmt::Result { + let mut out = yaxpeax_arch::display::FmtSink::new(out); + contextualize_intel(self, &mut out) + } + + /// format this instruction into `out`, which may perform additional styling based on its + /// `DisplaySink` implementation. + #[cfg_attr(feature="profiling", inline(never))] + pub fn display_into(&self, out: &mut T) -> fmt::Result { contextualize_intel(self, out) } } @@ -3590,7 +3597,7 @@ pub(crate) fn contextualize_intel(instr: &Instruction, out: &mut } } - let mut displayer = ColorizingOperandVisitor { + let mut displayer = DisplayingOperandVisitor { f: out, }; instr.visit_operand(0 as u8, &mut displayer)?; @@ -3619,7 +3626,7 @@ pub(crate) fn contextualize_intel(instr: &Instruction, out: &mut } } - let mut displayer = ColorizingOperandVisitor { + let mut displayer = DisplayingOperandVisitor { f: out, }; @@ -3732,9 +3739,7 @@ pub(crate) fn contextualize_c(instr: &Instruction, out: &mut T) } fn write_jmp_operand(op: Operand, out: &mut T) -> fmt::Result { - let mut out = yaxpeax_arch::display::NoColorsSink { - out, - }; + let mut out = yaxpeax_arch::display::FmtSink::new(out); use core::fmt::Write; match op { Operand::ImmediateI8(rel) => { @@ -4036,9 +4041,7 @@ impl <'instr, T: fmt::Write, Y: YaxColors> ShowContextual style, } = self; - let mut out = yaxpeax_arch::display::NoColorsSink { - out: out, - }; + let mut out = yaxpeax_arch::display::FmtSink::new(out); match style { DisplayStyle::Intel => { @@ -4054,9 +4057,7 @@ impl <'instr, T: fmt::Write, Y: YaxColors> ShowContextual #[cfg(feature="std")] impl ShowContextual], T, Y> for Instruction { fn contextualize(&self, colors: &Y, _address: u64, context: Option<&[Option]>, out: &mut T) -> fmt::Result { - let mut out = yaxpeax_arch::display::NoColorsSink { - out, - }; + let mut out = yaxpeax_arch::display::FmtSink::new(out); let out = &mut out; use core::fmt::Write; @@ -4092,7 +4093,7 @@ impl ShowContextual ShowContextual { write!(out, ", ")?; - let mut displayer = ColorizingOperandVisitor { + let mut displayer = DisplayingOperandVisitor { f: out, }; self.visit_operand(i as u8, &mut displayer)?; diff --git a/test/long_mode/mod.rs b/test/long_mode/mod.rs index 96c7c79..6c666e1 100644 --- a/test/long_mode/mod.rs +++ b/test/long_mode/mod.rs @@ -79,14 +79,12 @@ fn test_display_under(decoder: &InstDecoder, data: &[u8], expected: &'static str ); let mut text2 = String::new(); - let mut out = yaxpeax_arch::display::NoColorsSink { - out: &mut text2, - }; + let mut out = yaxpeax_arch::display::FmtSink::new(&mut text2); instr.write_to(&mut out).expect("printing succeeds"); assert!( text2 == text, - "display error through NoColorsSink for {}:\n decoded: {:?} under decoder {}\n displayed: {}\n expected: {}\n", + "display error through FmtSink for {}:\n decoded: {:?} under decoder {}\n displayed: {}\n expected: {}\n", hex, instr, decoder, From 4c1f3c84bdba53c514713fbaaf00431efd60d21c Mon Sep 17 00:00:00 2001 From: iximeow Date: Sun, 23 Jun 2024 10:11:57 -0700 Subject: [PATCH 61/95] add more conditional inlining for 32-bit and 16-bit decoders --- src/protected_mode/mod.rs | 8 +++++++- src/real_mode/mod.rs | 8 +++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/src/protected_mode/mod.rs b/src/protected_mode/mod.rs index 0d125eb..cf72fda 100644 --- a/src/protected_mode/mod.rs +++ b/src/protected_mode/mod.rs @@ -5706,7 +5706,8 @@ const OPCODES: [OpcodeRecord; 256] = [ ]; #[allow(non_snake_case)] -#[inline(always)] +#[cfg_attr(feature="profiling", inline(never))] +#[cfg_attr(not(feature="profiling"), inline(never))] pub(self) fn read_E< T: Reader<::Address, ::Word>, S: DescriptionSink, @@ -6444,6 +6445,7 @@ impl DecodeCtx { } } +#[cfg_attr(feature="profiling", inline(never))] fn read_opc_hotpath< T: Reader<::Address, ::Word>, S: DescriptionSink, @@ -6494,6 +6496,8 @@ fn read_opc_hotpath< } } +#[cfg_attr(feature="profiling", inline(never))] +#[cfg_attr(not(feature="profiling"), inline(never))] fn read_with_annotations< T: Reader<::Address, ::Word>, S: DescriptionSink, @@ -6654,6 +6658,8 @@ fn read_with_annotations< Ok(()) } +#[cfg_attr(feature="profiling", inline(never))] +#[cfg_attr(not(feature="profiling"), inline(never))] fn read_operands< T: Reader<::Address, ::Word>, S: DescriptionSink diff --git a/src/real_mode/mod.rs b/src/real_mode/mod.rs index 68ab6dd..b73f38a 100644 --- a/src/real_mode/mod.rs +++ b/src/real_mode/mod.rs @@ -5706,7 +5706,8 @@ const OPCODES: [OpcodeRecord; 256] = [ ]; #[allow(non_snake_case)] -#[inline(always)] +#[cfg_attr(feature="profiling", inline(never))] +#[cfg_attr(not(feature="profiling"), inline(always))] pub(self) fn read_E< T: Reader<::Address, ::Word>, S: DescriptionSink, @@ -6445,6 +6446,7 @@ impl DecodeCtx { } } +#[cfg_attr(feature="profiling", inline(never))] fn read_opc_hotpath< T: Reader<::Address, ::Word>, S: DescriptionSink, @@ -6495,6 +6497,8 @@ fn read_opc_hotpath< } } +#[cfg_attr(feature="profiling", inline(never))] +#[cfg_attr(not(feature="profiling"), inline(always))] fn read_with_annotations< T: Reader<::Address, ::Word>, S: DescriptionSink, @@ -6655,6 +6659,8 @@ fn read_with_annotations< Ok(()) } +#[cfg_attr(feature="profiling", inline(never))] +#[cfg_attr(not(feature="profiling"), inline(always))] fn read_operands< T: Reader<::Address, ::Word>, S: DescriptionSink From 931ad9b84e75faf734ddff19b692481013260f6e Mon Sep 17 00:00:00 2001 From: iximeow Date: Sun, 23 Jun 2024 10:44:24 -0700 Subject: [PATCH 62/95] InstructionTextBuffer is only present with alloc (new crate flag) --- Cargo.toml | 8 +- src/long_mode/display.rs | 177 ++++++++++++++++++++------------------- src/long_mode/mod.rs | 4 +- test/long_mode/mod.rs | 3 + 4 files changed, 106 insertions(+), 86 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 34f8af7..ecc56be 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -40,7 +40,13 @@ lto = true default = ["std", "colors", "use-serde", "fmt"] # opt-in for some apis that are really much nicer with String -std = ["yaxpeax-arch/std"] +std = ["alloc", "yaxpeax-arch/std"] + +# opt-in for some formatting-related helpers that require performing allocation +# +# this should only be useful with `fmt` currently, but in the future there could +# be other `fmt`-independent code gated on `alloc`. +alloc = ["yaxpeax-arch/alloc"] # feature for formatting instructions and their components fmt = [] diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index e8000ed..9d9b7bb 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -4273,97 +4273,106 @@ impl<'a, F: DisplaySink> crate::long_mode::OperandVisitor for RelativeBranchPrin } } -/// helper to format `amd64` instructions with highest throughput and least configuration. this is -/// functionally a buffer for one x86 instruction's text. -/// -/// ### when to use this over `fmt::Display`? -/// -/// `fmt::Display` is a fair choice in most cases. in some cases, `InstructionTextBuffer` may -/// support formatting options that may be difficult to configure for a `Display` impl. -/// additionally, `InstructionTextBuffer` may be able to specialize more effectively where -/// `fmt::Display`, writing to a generic `fmt::Write`, may not. -/// -/// if your use case for `yaxpeax-x86` involves being bounded on the speed of disassembling and -/// formatting instructions, [`InstructionTextBuffer::format_inst`] has been measured as up to 11% -/// faster than an equivalent `write!(buf, "{}", inst)`. -/// -/// `InstructionTextBuffer` involves internal allocations; if your use case for `yaxpeax-x86` -/// requires allocations never occurring, it is not an appropriate tool. -/// -/// ### example -/// -/// ``` -/// use yaxpeax_x86::long_mode::InstDecoder; -/// use yaxpeax_x86::long_mode::InstructionTextBuffer; -/// use yaxpeax_x86::long_mode::DisplayStyle; -/// -/// let bytes = &[0x33, 0xc0]; -/// let inst = InstDecoder::default().decode_slice(bytes).expect("can decode"); -/// let mut text_buf = InstructionTextBuffer::new(); -/// assert_eq!( -/// text_buf.format_inst(&inst.display_with(DisplayStyle::Intel)).expect("can format"), -/// "xor eax, eax" -/// ); -/// -/// // or, getting the formatted instruction with `text_str`: -/// assert_eq!( -/// text_buf.text_str(), -/// "xor eax, eax" -/// ); -/// ``` -pub struct InstructionTextBuffer { - content: alloc::string::String, -} - -impl InstructionTextBuffer { - /// create an `InstructionTextBuffer` with default settings. `InstructionTextBuffer`'s default - /// settings format instructions identically to their corresponding `fmt::Display`. - pub fn new() -> Self { - let mut buf = alloc::string::String::new(); - // TODO: move 512 out to a MAX_INSTRUCTION_LEN const and appropriate justification (and - // fuzzing and ..) - buf.reserve(512); - Self { - content: buf, - } - } +#[cfg(feature="alloc")] +mod buffer_sink { + use core::fmt; + use super::super::{DisplayStyle, InstructionDisplayer}; + use super::{contextualize_c, contextualize_intel}; - /// format `inst` into this buffer. returns a borrow of that same internal buffer for convenience. + /// helper to format `amd64` instructions with highest throughput and least configuration. this is + /// functionally a buffer for one x86 instruction's text. + /// + /// ### when to use this over `fmt::Display`? + /// + /// `fmt::Display` is a fair choice in most cases. in some cases, `InstructionTextBuffer` may + /// support formatting options that may be difficult to configure for a `Display` impl. + /// additionally, `InstructionTextBuffer` may be able to specialize more effectively where + /// `fmt::Display`, writing to a generic `fmt::Write`, may not. + /// + /// if your use case for `yaxpeax-x86` involves being bounded on the speed of disassembling and + /// formatting instructions, [`InstructionTextBuffer::format_inst`] has been measured as up to 11% + /// faster than an equivalent `write!(buf, "{}", inst)`. /// - /// this clears and reuses an internal buffer; if an instruction had been previously formatted - /// through this buffer, it will be overwritten. - pub fn format_inst<'buf, 'instr>(&'buf mut self, display: &InstructionDisplayer<'instr>) -> Result<&'buf str, fmt::Error> { - // Safety: this sink is used to format exactly one instruction and then dropped. it can - // never escape `format_inst`. - let mut handle = unsafe { self.write_handle() }; + /// `InstructionTextBuffer` involves internal allocations; if your use case for `yaxpeax-x86` + /// requires allocations never occurring, it is not an appropriate tool. + /// + /// ### example + /// + /// ``` + /// use yaxpeax_x86::long_mode::InstDecoder; + /// use yaxpeax_x86::long_mode::InstructionTextBuffer; + /// use yaxpeax_x86::long_mode::DisplayStyle; + /// + /// let bytes = &[0x33, 0xc0]; + /// let inst = InstDecoder::default().decode_slice(bytes).expect("can decode"); + /// let mut text_buf = InstructionTextBuffer::new(); + /// assert_eq!( + /// text_buf.format_inst(&inst.display_with(DisplayStyle::Intel)).expect("can format"), + /// "xor eax, eax" + /// ); + /// + /// // or, getting the formatted instruction with `text_str`: + /// assert_eq!( + /// text_buf.text_str(), + /// "xor eax, eax" + /// ); + /// ``` + pub struct InstructionTextBuffer { + content: alloc::string::String, + } - match display.style { - DisplayStyle::Intel => { - contextualize_intel(&display.instr, &mut handle)?; - } - DisplayStyle::C => { - contextualize_c(&display.instr, &mut handle)?; + impl InstructionTextBuffer { + /// create an `InstructionTextBuffer` with default settings. `InstructionTextBuffer`'s default + /// settings format instructions identically to their corresponding `fmt::Display`. + pub fn new() -> Self { + let mut buf = alloc::string::String::new(); + // TODO: move 512 out to a MAX_INSTRUCTION_LEN const and appropriate justification (and + // fuzzing and ..) + buf.reserve(512); + Self { + content: buf, } } - Ok(self.text_str()) - } + /// format `inst` into this buffer. returns a borrow of that same internal buffer for convenience. + /// + /// this clears and reuses an internal buffer; if an instruction had been previously formatted + /// through this buffer, it will be overwritten. + pub fn format_inst<'buf, 'instr>(&'buf mut self, display: &InstructionDisplayer<'instr>) -> Result<&'buf str, fmt::Error> { + // Safety: this sink is used to format exactly one instruction and then dropped. it can + // never escape `format_inst`. + let mut handle = unsafe { self.write_handle() }; - /// return a borrow of the internal buffer. if an instruction has been formatted, the - /// returned `&str` contains that instruction's buffered text. - pub fn text_str(&self) -> &str { - self.content.as_str() - } + match display.style { + DisplayStyle::Intel => { + contextualize_intel(&display.instr, &mut handle)?; + } + DisplayStyle::C => { + contextualize_c(&display.instr, &mut handle)?; + } + } - /// do the necessary bookkeeping and provide an `InstructionTextSink` to write an instruction - /// into. - /// - /// SAFETY: callers must print at most one instruction into this handle. - unsafe fn write_handle(&mut self) -> yaxpeax_arch::display::InstructionTextSink { - self.content.clear(); - // Safety: `content` was just cleared, so writing begins at the start of the buffer. - // `content`is large enough to hold a fully-formatted instruction (see - // `InstructionTextBuffer::new`). - yaxpeax_arch::display::InstructionTextSink::new(&mut self.content) + Ok(self.text_str()) + } + + /// return a borrow of the internal buffer. if an instruction has been formatted, the + /// returned `&str` contains that instruction's buffered text. + pub fn text_str(&self) -> &str { + self.content.as_str() + } + + /// do the necessary bookkeeping and provide an `InstructionTextSink` to write an instruction + /// into. + /// + /// SAFETY: callers must print at most one instruction into this handle. + unsafe fn write_handle(&mut self) -> yaxpeax_arch::display::InstructionTextSink { + self.content.clear(); + // Safety: `content` was just cleared, so writing begins at the start of the buffer. + // `content`is large enough to hold a fully-formatted instruction (see + // `InstructionTextBuffer::new`). + yaxpeax_arch::display::InstructionTextSink::new(&mut self.content) + } } } +#[cfg(feature="alloc")] +pub use buffer_sink::InstructionTextBuffer; diff --git a/src/long_mode/mod.rs b/src/long_mode/mod.rs index d66f59a..2ccbc22 100644 --- a/src/long_mode/mod.rs +++ b/src/long_mode/mod.rs @@ -7,7 +7,9 @@ pub mod uarch; pub use crate::MemoryAccessSize; #[cfg(feature = "fmt")] -pub use self::display::{DisplayStyle, InstructionDisplayer, InstructionTextBuffer}; +pub use self::display::{DisplayStyle, InstructionDisplayer}; +#[cfg(all(feature = "fmt", feature = "alloc"))] +pub use self::display::InstructionTextBuffer; use core::cmp::PartialEq; use crate::safer_unchecked::unreachable_kinda_unchecked as unreachable_unchecked; diff --git a/test/long_mode/mod.rs b/test/long_mode/mod.rs index 6c666e1..d8cb0ef 100644 --- a/test/long_mode/mod.rs +++ b/test/long_mode/mod.rs @@ -92,9 +92,12 @@ fn test_display_under(decoder: &InstDecoder, data: &[u8], expected: &'static str text, ); + #[cfg(feature="alloc")] let mut formatter = yaxpeax_x86::long_mode::InstructionTextBuffer::new(); + #[cfg(feature="alloc")] let text3 = formatter.format_inst(&instr.display_with(yaxpeax_x86::long_mode::DisplayStyle::Intel)).expect("printing succeeds"); + #[cfg(feature="alloc")] assert!( text3 == text, "display error through InstructionTextBuffer for {}:\n decoded: {:?} under decoder {}\n displayed: {}\n expected: {}\n", From 42f9eed1f20ffa33ca72c18e6600d03483c5bc77 Mon Sep 17 00:00:00 2001 From: iximeow Date: Sun, 23 Jun 2024 10:45:10 -0700 Subject: [PATCH 63/95] port opcode helpers and reordering to 32-bit and 16-bit decoders --- src/protected_mode/display.rs | 14 +++++++------- src/protected_mode/mod.rs | 28 ++++++++++++++++++++-------- src/real_mode/display.rs | 14 +++++++------- src/real_mode/mod.rs | 28 ++++++++++++++++++++-------- 4 files changed, 54 insertions(+), 30 deletions(-) diff --git a/src/protected_mode/display.rs b/src/protected_mode/display.rs index 89b7565..ed5d902 100644 --- a/src/protected_mode/display.rs +++ b/src/protected_mode/display.rs @@ -376,6 +376,13 @@ const MNEMONICS: &[&'static str] = &[ "not", "xadd", "xchg", + "cmps", + "scas", + "movs", + "lods", + "stos", + "ins", + "outs", "invalid", "bt", "bsf", @@ -457,17 +464,10 @@ const MNEMONICS: &[&'static str] = &[ "cwd", "cdq", "cqo", - "lods", - "stos", "lahf", "sahf", - "cmps", - "scas", - "movs", "test", - "ins", "in", - "outs", "out", "imul", "jo", diff --git a/src/protected_mode/mod.rs b/src/protected_mode/mod.rs index cf72fda..482a921 100644 --- a/src/protected_mode/mod.rs +++ b/src/protected_mode/mod.rs @@ -968,7 +968,16 @@ pub enum Opcode { NOT = 0x1019, XADD = 0x101a, XCHG = 0x101b, - Invalid = 0x1c, + + CMPS = 0x201c, + SCAS = 0x201d, + MOVS = 0x201e, + LODS = 0x201f, + STOS = 0x2020, + INS = 0x2021, + OUTS = 0x2022, + + Invalid = 0x23, // XADD, BT, // BTS, @@ -1053,17 +1062,10 @@ pub enum Opcode { CWD, CDQ, CQO, - LODS, - STOS, LAHF, SAHF, - CMPS, - SCAS, - MOVS, TEST, - INS, IN, - OUTS, OUT, IMUL, JO, @@ -4217,6 +4219,16 @@ impl Opcode { _ => None, } } + + #[inline(always)] + fn can_lock(&self) -> bool { + (*self as u32) & 0x1000 != 0 + } + + #[inline(always)] + fn can_rep(&self) -> bool { + (*self as u32) & 0x2000 != 0 + } } impl Default for Instruction { diff --git a/src/real_mode/display.rs b/src/real_mode/display.rs index 6472c6c..9332011 100644 --- a/src/real_mode/display.rs +++ b/src/real_mode/display.rs @@ -376,6 +376,13 @@ const MNEMONICS: &[&'static str] = &[ "not", "xadd", "xchg", + "cmps", + "scas", + "movs", + "lods", + "stos", + "ins", + "outs", "invalid", "bt", "bsf", @@ -457,17 +464,10 @@ const MNEMONICS: &[&'static str] = &[ "cwd", "cdq", "cqo", - "lods", - "stos", "lahf", "sahf", - "cmps", - "scas", - "movs", "test", - "ins", "in", - "outs", "out", "imul", "jo", diff --git a/src/real_mode/mod.rs b/src/real_mode/mod.rs index b73f38a..87f2ff3 100644 --- a/src/real_mode/mod.rs +++ b/src/real_mode/mod.rs @@ -968,7 +968,16 @@ pub enum Opcode { NOT = 0x1019, XADD = 0x101a, XCHG = 0x101b, - Invalid = 0x1c, + + CMPS = 0x201c, + SCAS = 0x201d, + MOVS = 0x201e, + LODS = 0x201f, + STOS = 0x2020, + INS = 0x2021, + OUTS = 0x2022, + + Invalid = 0x23, // XADD, BT, // BTS, @@ -1053,17 +1062,10 @@ pub enum Opcode { CWD, CDQ, CQO, - LODS, - STOS, LAHF, SAHF, - CMPS, - SCAS, - MOVS, TEST, - INS, IN, - OUTS, OUT, IMUL, JO, @@ -4217,6 +4219,16 @@ impl Opcode { _ => None, } } + + #[inline(always)] + fn can_lock(&self) -> bool { + (*self as u32) & 0x1000 != 0 + } + + #[inline(always)] + fn can_rep(&self) -> bool { + (*self as u32) & 0x2000 != 0 + } } impl Default for Instruction { From df67ba2b415d9157e4b56723c0b92695a4124ea0 Mon Sep 17 00:00:00 2001 From: iximeow Date: Sun, 23 Jun 2024 10:48:30 -0700 Subject: [PATCH 64/95] actually use new can_rep in 32b and 16b modes --- src/protected_mode/display.rs | 2 +- src/real_mode/display.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/protected_mode/display.rs b/src/protected_mode/display.rs index ed5d902..9cc949d 100644 --- a/src/protected_mode/display.rs +++ b/src/protected_mode/display.rs @@ -3376,7 +3376,7 @@ fn contextualize_intel(instr: &Instruction, colors: } if instr.prefixes.rep_any() { - if [Opcode::MOVS, Opcode::CMPS, Opcode::LODS, Opcode::STOS, Opcode::INS, Opcode::OUTS].contains(&instr.opcode) { + if instr.opcode.can_rep() { if instr.prefixes.rep() { write!(out, "rep ")?; } else if instr.prefixes.repnz() { diff --git a/src/real_mode/display.rs b/src/real_mode/display.rs index 9332011..b06e3d2 100644 --- a/src/real_mode/display.rs +++ b/src/real_mode/display.rs @@ -3376,7 +3376,7 @@ fn contextualize_intel(instr: &Instruction, colors: } if instr.prefixes.rep_any() { - if [Opcode::MOVS, Opcode::CMPS, Opcode::LODS, Opcode::STOS, Opcode::INS, Opcode::OUTS].contains(&instr.opcode) { + if instr.opcode.can_rep() { if instr.prefixes.rep() { write!(out, "rep ")?; } else if instr.prefixes.repnz() { From e6928294fd9eec219430a610ffe0ffb9d2cd7d23 Mon Sep 17 00:00:00 2001 From: iximeow Date: Sun, 23 Jun 2024 10:50:00 -0700 Subject: [PATCH 65/95] actually use new can_lock in 32b and 16b modes --- src/protected_mode/mod.rs | 2 +- src/real_mode/mod.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/protected_mode/mod.rs b/src/protected_mode/mod.rs index 482a921..2403a8a 100644 --- a/src/protected_mode/mod.rs +++ b/src/protected_mode/mod.rs @@ -6662,7 +6662,7 @@ fn read_with_annotations< self.read_operands(decoder, words, instruction, record, sink)?; if self.check_lock { - if (instruction.opcode as u32) < 0x1000 || !instruction.operands[0].is_memory() { + if !instruction.opcode.can_lock() || !instruction.operands[0].is_memory() { return Err(DecodeError::InvalidPrefixes); } } diff --git a/src/real_mode/mod.rs b/src/real_mode/mod.rs index 87f2ff3..2a3723f 100644 --- a/src/real_mode/mod.rs +++ b/src/real_mode/mod.rs @@ -6663,7 +6663,7 @@ fn read_with_annotations< self.read_operands(decoder, words, instruction, record, sink)?; if self.check_lock { - if (instruction.opcode as u32) < 0x1000 || !instruction.operands[0].is_memory() { + if !instruction.opcode.can_lock() || !instruction.operands[0].is_memory() { return Err(DecodeError::InvalidPrefixes); } } From f951ccb3b97c88265b5a1978dacf462c1d6db697 Mon Sep 17 00:00:00 2001 From: iximeow Date: Sun, 23 Jun 2024 11:56:40 -0700 Subject: [PATCH 66/95] centralize unsafe claims and better validate --- src/long_mode/display.rs | 175 ++++++++++++++++++++++++++++++--------- src/long_mode/mod.rs | 2 +- 2 files changed, 136 insertions(+), 41 deletions(-) diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index 9d9b7bb..bcea94a 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -8,6 +8,102 @@ use crate::long_mode::{RegSpec, Opcode, Operand, MergeMode, InstDecoder, Instruc use yaxpeax_arch::display::DisplaySink; +trait DisplaySinkExt { + // `write_opcode` depends on all mnemonics being less than 32 bytes long. check that here, at + // compile time. referenced later to force evaluation of this const. + const MNEMONIC_LT_32: () = { + let mut i = 0; + while i < MNEMONICS.len() { + let name = &MNEMONICS[i]; + if name.len() >= 32 { + panic!("mnemonic too long"); + } + i += 1; + } + }; + + // `write_reg` depends on all register names being less than 8 bytes long. check that here, at + // compile time. referenced later to force evaluation of this const. + const REG_LABEL_LT_8: () = { + let mut i = 0; + while i < REG_NAMES.len() { + let name = ®_NAMES[i]; + if name.len() >= 8 { + panic!("register name too long"); + } + i += 1; + } + }; + + // `write_mem_size_label` depends on all memory size labels being less than 8 bytes long. check + // that here, at compile time. referenced later to force evaluation of this const. + const MEM_SIZE_LABEL_LT_8: () = { + let mut i = 0; + while i < crate::MEM_SIZE_STRINGS.len() { + let name = &MEM_SIZE_STRINGS[i]; + if name.len() >= 8 { + panic!("memory label name too long"); + } + i += 1; + } + }; + + // `write_sae_mode` depends on all sae mode labels being less than 16 bytes long. check that + // here, at compile time. referenced later to force evaluation of this const. + const SAE_LABEL_LT_16: () = { + let mut i = 0; + while i < super::SAE_MODES.len() { + let mode = &super::SAE_MODES[i]; + if mode.label().len() >= 16 { + panic!("sae mode label too long"); + } + i += 1; + } + }; + + fn write_opcode(&mut self, opcode: super::Opcode) -> Result<(), core::fmt::Error>; + fn write_reg(&mut self, reg: RegSpec) -> Result<(), core::fmt::Error>; + fn write_mem_size_label(&mut self, mem_size: u8) -> Result<(), core::fmt::Error>; + fn write_sae_mode(&mut self, sae: super::SaeMode) -> Result<(), core::fmt::Error>; +} + +impl DisplaySinkExt for T { + #[inline(always)] + fn write_opcode(&mut self, opcode: super::Opcode) -> Result<(), core::fmt::Error> { + let name = opcode.name(); + + let _ = Self::MNEMONIC_LT_32; + // Safety: all opcode mnemonics are 31 bytes or fewer. + unsafe { self.write_lt_32(name) } + } + + #[inline(always)] + fn write_reg(&mut self, reg: RegSpec) -> Result<(), core::fmt::Error> { + let label = regspec_label(®); + + let _ = Self::REG_LABEL_LT_8; + // Safety: all register labels are 7 bytes or fewer. + unsafe { self.write_lt_8(label) } + } + + #[inline(always)] + fn write_mem_size_label(&mut self, mem_size: u8) -> Result<(), core::fmt::Error> { + let label = mem_size_label(mem_size); + let _ = Self::MEM_SIZE_LABEL_LT_8; + // Safety: all memory size labels are 7 bytes or fewer + unsafe { self.write_lt_8(label) } + } + + #[inline(always)] + fn write_sae_mode(&mut self, sae_mode: super::SaeMode) -> Result<(), core::fmt::Error> { + let label = sae_mode.label(); + + let _ = Self::SAE_LABEL_LT_16; + // Safety: all sae labels are 15 bytes or fewer. + unsafe { self.write_lt_16(label) } + } +} + impl fmt::Display for InstDecoder { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { if self == &InstDecoder::default() { @@ -266,18 +362,18 @@ impl crate::long_mode::OperandVisitor for DisplayingOperandVisi #[cfg_attr(feature="profiling", inline(never))] fn visit_reg(&mut self, reg: RegSpec) -> Result { self.f.span_start_register(); - unsafe { self.f.write_lt_8(regspec_label(®))?; } + self.f.write_reg(reg)?; self.f.span_end_register(); Ok(()) } fn visit_reg_mask_merge(&mut self, spec: RegSpec, mask: RegSpec, merge_mode: MergeMode) -> Result { self.f.span_start_register(); - unsafe { self.f.write_lt_8(regspec_label(&spec))?; } + self.f.write_reg(spec)?; self.f.span_end_register(); if mask.num != 0 { self.f.write_fixed_size("{")?; self.f.span_start_register(); - unsafe { self.f.write_lt_8(regspec_label(&mask))?; } + self.f.write_reg(mask)?; self.f.span_end_register(); self.f.write_fixed_size("}")?; } @@ -287,23 +383,23 @@ impl crate::long_mode::OperandVisitor for DisplayingOperandVisi Ok(()) } fn visit_reg_mask_merge_sae(&mut self, spec: RegSpec, mask: RegSpec, merge_mode: MergeMode, sae_mode: crate::long_mode::SaeMode) -> Result { - unsafe { self.f.write_lt_8(regspec_label(&spec))?; } + self.f.write_reg(spec)?; if mask.num != 0 { self.f.write_fixed_size("{")?; - unsafe { self.f.write_lt_8(regspec_label(&mask))?; } + self.f.write_reg(mask)?; self.f.write_fixed_size("}")?; } if let MergeMode::Zero = merge_mode { self.f.write_fixed_size("{z}")?; } - unsafe { self.f.write_lt_16(sae_mode.label())?; } + self.f.write_sae_mode(sae_mode)?; Ok(()) } fn visit_reg_mask_merge_sae_noround(&mut self, spec: RegSpec, mask: RegSpec, merge_mode: MergeMode) -> Result { - unsafe { self.f.write_lt_8(regspec_label(&spec))?; } + self.f.write_reg(spec)?; if mask.num != 0 { self.f.write_fixed_size("{")?; - unsafe { self.f.write_lt_8(regspec_label(&mask))?; } + self.f.write_reg(mask)?; self.f.write_fixed_size("}")?; } if let MergeMode::Zero = merge_mode { @@ -330,7 +426,7 @@ impl crate::long_mode::OperandVisitor for DisplayingOperandVisi #[cfg_attr(feature="profiling", inline(never))] fn visit_disp(&mut self, reg: RegSpec, disp: i32) -> Result { self.f.write_char('[')?; - unsafe { self.f.write_lt_8(regspec_label(®))?; } + self.f.write_reg(reg)?; self.f.write_fixed_size(" ")?; { @@ -347,12 +443,12 @@ impl crate::long_mode::OperandVisitor for DisplayingOperandVisi } fn visit_deref(&mut self, reg: RegSpec) -> Result { self.f.write_fixed_size("[")?; - unsafe { self.f.write_lt_8(regspec_label(®))?; } + self.f.write_reg(reg)?; self.f.write_fixed_size("]") } fn visit_reg_scale(&mut self, reg: RegSpec, scale: u8) -> Result { self.f.write_fixed_size("[")?; - unsafe { self.f.write_lt_8(regspec_label(®))?; } + self.f.write_reg(reg)?; self.f.write_fixed_size(" * ")?; self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc self.f.write_fixed_size("]")?; @@ -361,7 +457,7 @@ impl crate::long_mode::OperandVisitor for DisplayingOperandVisi } fn visit_reg_scale_disp(&mut self, reg: RegSpec, scale: u8, disp: i32) -> Result { self.f.write_fixed_size("[")?; - unsafe { self.f.write_lt_8(regspec_label(®))?; } + self.f.write_reg(reg)?; self.f.write_fixed_size(" * ")?; self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc self.f.write_fixed_size(" ")?; @@ -380,18 +476,18 @@ impl crate::long_mode::OperandVisitor for DisplayingOperandVisi } fn visit_index_base_scale(&mut self, base: RegSpec, index: RegSpec, scale: u8) -> Result { self.f.write_fixed_size("[")?; - unsafe { self.f.write_lt_8(regspec_label(&base))?; } + self.f.write_reg(base)?; self.f.write_fixed_size(" + ")?; - unsafe { self.f.write_lt_8(regspec_label(&index))?; } + self.f.write_reg(index)?; self.f.write_fixed_size(" * ")?; self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc self.f.write_fixed_size("]") } fn visit_index_base_scale_disp(&mut self, base: RegSpec, index: RegSpec, scale: u8, disp: i32) -> Result { self.f.write_fixed_size("[")?; - unsafe { self.f.write_lt_8(regspec_label(&base))?; } + self.f.write_reg(base)?; self.f.write_fixed_size(" + ")?; - unsafe { self.f.write_lt_8(regspec_label(&index))?; } + self.f.write_reg(index)?; self.f.write_fixed_size(" * ")?; self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc self.f.write_fixed_size(" ")?; @@ -410,7 +506,7 @@ impl crate::long_mode::OperandVisitor for DisplayingOperandVisi } fn visit_reg_disp_masked(&mut self, spec: RegSpec, disp: i32, mask_reg: RegSpec) -> Result { self.f.write_char('[')?; - unsafe { self.f.write_lt_8(regspec_label(&spec))?; } + self.f.write_reg(spec)?; self.f.write_char(' ')?; let mut v = disp as u32; if disp < 0 { @@ -422,33 +518,33 @@ impl crate::long_mode::OperandVisitor for DisplayingOperandVisi self.f.write_u32(v)?; self.f.write_char(']')?; self.f.write_char('{')?; - unsafe { self.f.write_lt_8(regspec_label(&mask_reg))?; } + self.f.write_reg(mask_reg)?; self.f.write_char('}')?; Ok(()) } fn visit_reg_deref_masked(&mut self, spec: RegSpec, mask_reg: RegSpec) -> Result { self.f.write_fixed_size("[")?; - unsafe { self.f.write_lt_8(regspec_label(&spec))?; } + self.f.write_reg(spec)?; self.f.write_fixed_size("]")?; self.f.write_char('{')?; - unsafe { self.f.write_lt_8(regspec_label(&mask_reg))?; } + self.f.write_reg(mask_reg)?; self.f.write_char('}')?; Ok(()) } fn visit_reg_scale_masked(&mut self, spec: RegSpec, scale: u8, mask_reg: RegSpec) -> Result { self.f.write_fixed_size("[")?; - unsafe { self.f.write_lt_8(regspec_label(&spec))?; } + self.f.write_reg(spec)?; self.f.write_fixed_size(" * ")?; self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc self.f.write_fixed_size("]")?; self.f.write_char('{')?; - unsafe { self.f.write_lt_8(regspec_label(&mask_reg))?; } + self.f.write_reg(mask_reg)?; self.f.write_char('}')?; Ok(()) } fn visit_reg_scale_disp_masked(&mut self, spec: RegSpec, scale: u8, disp: i32, mask_reg: RegSpec) -> Result { self.f.write_fixed_size("[")?; - unsafe { self.f.write_lt_8(regspec_label(&spec))?; } + self.f.write_reg(spec)?; self.f.write_fixed_size(" * ")?; self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc self.f.write_fixed_size(" ")?; @@ -462,26 +558,26 @@ impl crate::long_mode::OperandVisitor for DisplayingOperandVisi self.f.write_u32(v)?; self.f.write_char(']')?; self.f.write_char('{')?; - unsafe { self.f.write_lt_8(regspec_label(&mask_reg))?; } + self.f.write_reg(mask_reg)?; self.f.write_char('}')?; Ok(()) } fn visit_index_base_masked(&mut self, base: RegSpec, index: RegSpec, mask_reg: RegSpec) -> Result { self.f.write_fixed_size("[")?; - unsafe { self.f.write_lt_8(regspec_label(&base))?; } + self.f.write_reg(base)?; self.f.write_fixed_size(" + ")?; - unsafe { self.f.write_lt_8(regspec_label(&index))?; } + self.f.write_reg(index)?; self.f.write_fixed_size("]")?; self.f.write_char('{')?; - unsafe { self.f.write_lt_8(regspec_label(&mask_reg))?; } + self.f.write_reg(mask_reg)?; self.f.write_char('}')?; Ok(()) } fn visit_index_base_disp_masked(&mut self, base: RegSpec, index: RegSpec, disp: i32, mask_reg: RegSpec) -> Result { self.f.write_fixed_size("[")?; - unsafe { self.f.write_lt_8(regspec_label(&base))?; } + self.f.write_reg(base)?; self.f.write_fixed_size(" + ")?; - unsafe { self.f.write_lt_8(regspec_label(&index))?; } + self.f.write_reg(index)?; self.f.write_fixed_size(" ")?; let mut v = disp as u32; if disp < 0 { @@ -493,28 +589,28 @@ impl crate::long_mode::OperandVisitor for DisplayingOperandVisi self.f.write_u32(v)?; self.f.write_char(']')?; self.f.write_char('{')?; - unsafe { self.f.write_lt_8(regspec_label(&mask_reg))?; } + self.f.write_reg(mask_reg)?; self.f.write_char('}')?; Ok(()) } fn visit_index_base_scale_masked(&mut self, base: RegSpec, index: RegSpec, scale: u8, mask_reg: RegSpec) -> Result { self.f.write_fixed_size("[")?; - unsafe { self.f.write_lt_8(regspec_label(&base))?; } + self.f.write_reg(base)?; self.f.write_fixed_size(" + ")?; - unsafe { self.f.write_lt_8(regspec_label(&index))?; } + self.f.write_reg(index)?; self.f.write_fixed_size(" * ")?; self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc self.f.write_fixed_size("]")?; self.f.write_char('{')?; - unsafe { self.f.write_lt_8(regspec_label(&mask_reg))?; } + self.f.write_reg(mask_reg)?; self.f.write_char('}')?; Ok(()) } fn visit_index_base_scale_disp_masked(&mut self, base: RegSpec, index: RegSpec, scale: u8, disp: i32, mask_reg: RegSpec) -> Result { self.f.write_fixed_size("[")?; - unsafe { self.f.write_lt_8(regspec_label(&base))?; } + self.f.write_reg(base)?; self.f.write_fixed_size(" + ")?; - unsafe { self.f.write_lt_8(regspec_label(&index))?; } + self.f.write_reg(index)?; self.f.write_fixed_size(" * ")?; self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc self.f.write_char(' ')?; @@ -528,7 +624,7 @@ impl crate::long_mode::OperandVisitor for DisplayingOperandVisi self.f.write_u32(v)?; self.f.write_char(']')?; self.f.write_char('{')?; - unsafe { self.f.write_lt_8(regspec_label(&mask_reg))?; } + self.f.write_reg(mask_reg)?; self.f.write_char('}')?; Ok(()) } @@ -3571,8 +3667,7 @@ pub(crate) fn contextualize_intel(instr: &Instruction, out: &mut } } - // TODO: no x86 instruction longer than 32 bytes? - unsafe { out.write_lt_32(instr.opcode.name())? }; + out.write_opcode(instr.opcode)?; if instr.operand_count > 0 { out.write_fixed_size(" ")?; @@ -3585,7 +3680,7 @@ pub(crate) fn contextualize_intel(instr: &Instruction, out: &mut } if instr.operands[0 as usize].is_memory() { - unsafe { out.write_lt_8(mem_size_label(instr.mem_size))? }; + out.write_mem_size_label(instr.mem_size)?; if let Some(prefix) = instr.segment_override_for_op(0) { let name = prefix.name(); out.write_char(' ')?; @@ -3611,7 +3706,7 @@ pub(crate) fn contextualize_intel(instr: &Instruction, out: &mut } if instr.operands[i as usize].is_memory() { - unsafe { out.write_lt_8(mem_size_label(instr.mem_size))? }; + out.write_mem_size_label(instr.mem_size)?; if i >= 4 { unsafe { core::hint::unreachable_unchecked(); } } diff --git a/src/long_mode/mod.rs b/src/long_mode/mod.rs index 2ccbc22..fab8fde 100644 --- a/src/long_mode/mod.rs +++ b/src/long_mode/mod.rs @@ -523,7 +523,7 @@ impl SaeMode { /// assert_eq!(SaeMode::RoundUp.label(), "{ru-sae}"); /// assert_eq!(SaeMode::RoundZero.label(), "{rz-sae}"); /// ``` - pub fn label(&self) -> &'static str { + pub const fn label(&self) -> &'static str { match self { SaeMode::RoundNearest => "{rne-sae}", SaeMode::RoundDown => "{rd-sae}", From 22a7e97f2119d1a3facf6b1f2d95f0ed1ee10bee Mon Sep 17 00:00:00 2001 From: iximeow Date: Sun, 23 Jun 2024 12:30:16 -0700 Subject: [PATCH 67/95] adapt OperandVisitor to protected mode too --- src/protected_mode/display.rs | 646 ++++++++++++++++++++++++---------- src/protected_mode/mod.rs | 85 ++++- 2 files changed, 538 insertions(+), 193 deletions(-) diff --git a/src/protected_mode/display.rs b/src/protected_mode/display.rs index 9cc949d..635893c 100644 --- a/src/protected_mode/display.rs +++ b/src/protected_mode/display.rs @@ -7,6 +7,104 @@ use yaxpeax_arch::display::*; use crate::MEM_SIZE_STRINGS; use crate::protected_mode::{RegSpec, Opcode, Operand, MergeMode, InstDecoder, Instruction, Segment, PrefixVex, OperandSpec}; +use yaxpeax_arch::display::DisplaySink; + +trait DisplaySinkExt { + // `write_opcode` depends on all mnemonics being less than 32 bytes long. check that here, at + // compile time. referenced later to force evaluation of this const. + const MNEMONIC_LT_32: () = { + let mut i = 0; + while i < MNEMONICS.len() { + let name = &MNEMONICS[i]; + if name.len() >= 32 { + panic!("mnemonic too long"); + } + i += 1; + } + }; + + // `write_reg` depends on all register names being less than 8 bytes long. check that here, at + // compile time. referenced later to force evaluation of this const. + const REG_LABEL_LT_8: () = { + let mut i = 0; + while i < REG_NAMES.len() { + let name = ®_NAMES[i]; + if name.len() >= 8 { + panic!("register name too long"); + } + i += 1; + } + }; + + // `write_mem_size_label` depends on all memory size labels being less than 8 bytes long. check + // that here, at compile time. referenced later to force evaluation of this const. + const MEM_SIZE_LABEL_LT_8: () = { + let mut i = 0; + while i < crate::MEM_SIZE_STRINGS.len() { + let name = &MEM_SIZE_STRINGS[i]; + if name.len() >= 8 { + panic!("memory label name too long"); + } + i += 1; + } + }; + + // `write_sae_mode` depends on all sae mode labels being less than 16 bytes long. check that + // here, at compile time. referenced later to force evaluation of this const. + const SAE_LABEL_LT_16: () = { + let mut i = 0; + while i < super::SAE_MODES.len() { + let mode = &super::SAE_MODES[i]; + if mode.label().len() >= 16 { + panic!("sae mode label too long"); + } + i += 1; + } + }; + + fn write_opcode(&mut self, opcode: super::Opcode) -> Result<(), core::fmt::Error>; + fn write_reg(&mut self, reg: RegSpec) -> Result<(), core::fmt::Error>; + fn write_mem_size_label(&mut self, mem_size: u8) -> Result<(), core::fmt::Error>; + fn write_sae_mode(&mut self, sae: super::SaeMode) -> Result<(), core::fmt::Error>; +} + +impl DisplaySinkExt for T { + #[inline(always)] + fn write_opcode(&mut self, opcode: super::Opcode) -> Result<(), core::fmt::Error> { + let name = opcode.name(); + + let _ = Self::MNEMONIC_LT_32; + // Safety: all opcode mnemonics are 31 bytes or fewer. + unsafe { self.write_lt_32(name) } + } + + #[inline(always)] + fn write_reg(&mut self, reg: RegSpec) -> Result<(), core::fmt::Error> { + let label = regspec_label(®); + + let _ = Self::REG_LABEL_LT_8; + // Safety: all register labels are 7 bytes or fewer. + unsafe { self.write_lt_8(label) } + } + + #[inline(always)] + fn write_mem_size_label(&mut self, mem_size: u8) -> Result<(), core::fmt::Error> { + let label = mem_size_label(mem_size); + let _ = Self::MEM_SIZE_LABEL_LT_8; + // Safety: all memory size labels are 7 bytes or fewer + unsafe { self.write_lt_8(label) } + } + + #[inline(always)] + fn write_sae_mode(&mut self, sae_mode: super::SaeMode) -> Result<(), core::fmt::Error> { + let label = sae_mode.label(); + + let _ = Self::SAE_LABEL_LT_16; + // Safety: all sae labels are 15 bytes or fewer. + unsafe { self.write_lt_16(label) } + } +} + impl fmt::Display for InstDecoder { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { if self == &InstDecoder::default() { @@ -129,6 +227,10 @@ pub(crate) fn regspec_label(spec: &RegSpec) -> &'static str { unsafe { REG_NAMES.get_kinda_unchecked((spec.num as u16 + ((spec.bank as u16) << 3)) as usize) } } +pub(crate) fn mem_size_label(size: u8) -> &'static str { + unsafe { MEM_SIZE_STRINGS.get_kinda_unchecked(size as usize) } +} + impl fmt::Display for RegSpec { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { f.write_str(regspec_label(self)) @@ -143,201 +245,363 @@ impl fmt::Display for Operand { impl Colorize for Operand { fn colorize(&self, colors: &Y, f: &mut T) -> fmt::Result { - match self { - &Operand::ImmediateU8(imm) => { - write!(f, "{}", colors.number(u8_hex(imm))) - } - &Operand::ImmediateI8(imm) => { - write!(f, "{}", - colors.number(signed_i8_hex(imm))) - }, - &Operand::ImmediateU16(imm) => { - write!(f, "{}", colors.number(u16_hex(imm))) - } - &Operand::ImmediateI16(imm) => { - write!(f, "{}", - colors.number(signed_i16_hex(imm))) - }, - &Operand::ImmediateU32(imm) => { - write!(f, "{}", colors.number(u32_hex(imm))) - } - &Operand::ImmediateI32(imm) => { - write!(f, "{}", - colors.number(signed_i32_hex(imm))) - }, - &Operand::AbsoluteFarAddress { segment, address } => { - write!(f, "{}:{}", - colors.number(u16_hex(segment as u16)), - colors.number(u32_hex(address as u32)), - ) - }, - &Operand::Register(ref spec) => { - f.write_str(regspec_label(spec)) - } - &Operand::RegisterMaskMerge(ref spec, ref mask, merge_mode) => { - f.write_str(regspec_label(spec))?; - if mask.num != 0 { - f.write_str("{")?; - f.write_str(regspec_label(mask))?; - f.write_str("}")?; - } - if let MergeMode::Zero = merge_mode { - f.write_str("{z}")?; - } - Ok(()) - } - &Operand::RegisterMaskMergeSae(ref spec, ref mask, merge_mode, sae_mode) => { - f.write_str(regspec_label(spec))?; - if mask.num != 0 { - f.write_str("{")?; - f.write_str(regspec_label(mask))?; - f.write_str("}")?; - } - if let MergeMode::Zero = merge_mode { - f.write_str("{z}")?; - } - f.write_str(sae_mode.label())?; - Ok(()) - } - &Operand::RegisterMaskMergeSaeNoround(ref spec, ref mask, merge_mode) => { - f.write_str(regspec_label(spec))?; - if mask.num != 0 { - f.write_str("{")?; - f.write_str(regspec_label(mask))?; - f.write_str("}")?; - } - if let MergeMode::Zero = merge_mode { - f.write_str("{z}")?; - } - f.write_str("{sae}")?; - Ok(()) - } - &Operand::DisplacementU16(imm) => { - write!(f, "[{}]", colors.address(u16_hex(imm))) - } - &Operand::DisplacementU32(imm) => { - write!(f, "[{}]", colors.address(u32_hex(imm))) - } - &Operand::RegDisp(ref spec, disp) => { - write!(f, "[{} ", regspec_label(spec))?; - format_number_i32(colors, f, disp, NumberStyleHint::HexSignedWithSignSplit)?; - write!(f, "]") - }, - &Operand::RegDeref(ref spec) => { - f.write_str("[")?; - f.write_str(regspec_label(spec))?; - f.write_str("]") - }, - &Operand::RegScale(ref spec, scale) => { - write!(f, "[{} * {}]", - regspec_label(spec), - colors.number(scale) - ) - }, - &Operand::RegScaleDisp(ref spec, scale, disp) => { - write!(f, "[{} * {} ", - regspec_label(spec), - colors.number(scale), - )?; - format_number_i32(colors, f, disp, NumberStyleHint::HexSignedWithSignSplit)?; - write!(f, "]") - }, - &Operand::RegIndexBase(ref base, ref index) => { - f.write_str("[")?; - f.write_str(regspec_label(base))?; - f.write_str(" + ")?; - f.write_str(regspec_label(index))?; - f.write_str("]") - } - &Operand::RegIndexBaseDisp(ref base, ref index, disp) => { - write!(f, "[{} + {} ", - regspec_label(base), - regspec_label(index), - )?; - format_number_i32(colors, f, disp, NumberStyleHint::HexSignedWithSignSplit)?; - write!(f, "]") - }, - &Operand::RegIndexBaseScale(ref base, ref index, scale) => { - write!(f, "[{} + {} * {}]", - regspec_label(base), - regspec_label(index), - colors.number(scale) - ) + let mut f = yaxpeax_arch::display::FmtSink::new(f); + let mut visitor = DisplayingOperandVisitor { + f: &mut f + }; + self.visit(&mut visitor) + } +} + +struct DisplayingOperandVisitor<'a, T> { + f: &'a mut T, +} + +impl crate::protected_mode::OperandVisitor for DisplayingOperandVisitor<'_, T> { + type Ok = (); + type Error = core::fmt::Error; + + #[cfg_attr(feature="profiling", inline(never))] + fn visit_u8(&mut self, imm: u8) -> Result { + self.f.span_start_immediate(); + self.f.write_fixed_size("0x")?; + self.f.write_u8(imm)?; + self.f.span_end_immediate(); + Ok(()) + } + #[cfg_attr(feature="profiling", inline(never))] + fn visit_i8(&mut self, imm: i8) -> Result { + self.f.span_start_immediate(); + let mut v = imm as u8; + if imm < 0 { + self.f.write_char('-')?; + v = -imm as u8; + } + self.f.write_fixed_size("0x")?; + self.f.write_u8(v)?; + self.f.span_end_immediate(); + Ok(()) + } + #[cfg_attr(feature="profiling", inline(never))] + fn visit_u16(&mut self, imm: u16) -> Result { + self.f.span_start_immediate(); + self.f.write_fixed_size("0x")?; + self.f.write_u16(imm)?; + self.f.span_end_immediate(); + Ok(()) + } + #[cfg_attr(feature="profiling", inline(never))] + fn visit_i16(&mut self, imm: i16) -> Result { + self.f.span_start_immediate(); + let mut v = imm as u16; + if imm < 0 { + self.f.write_char('-')?; + v = -imm as u16; + } + self.f.write_fixed_size("0x")?; + self.f.write_u16(v)?; + self.f.span_end_immediate(); + Ok(()) + } + #[cfg_attr(feature="profiling", inline(never))] + fn visit_u32(&mut self, imm: u32) -> Result { + self.f.span_start_immediate(); + self.f.write_fixed_size("0x")?; + self.f.write_u32(imm)?; + self.f.span_end_immediate(); + Ok(()) + } + fn visit_i32(&mut self, imm: i32) -> Result { + self.f.span_start_immediate(); + let mut v = imm as u32; + if imm < 0 { + self.f.write_char('-')?; + v = -imm as u32; + } + self.f.write_fixed_size("0x")?; + self.f.write_u32(v)?; + self.f.span_end_immediate(); + Ok(()) + } + #[cfg_attr(feature="profiling", inline(never))] + fn visit_reg(&mut self, reg: RegSpec) -> Result { + self.f.span_start_register(); + self.f.write_reg(reg)?; + self.f.span_end_register(); + Ok(()) + } + fn visit_reg_mask_merge(&mut self, spec: RegSpec, mask: RegSpec, merge_mode: MergeMode) -> Result { + self.f.span_start_register(); + self.f.write_reg(spec)?; + self.f.span_end_register(); + if mask.num != 0 { + self.f.write_fixed_size("{")?; + self.f.span_start_register(); + self.f.write_reg(mask)?; + self.f.span_end_register(); + self.f.write_fixed_size("}")?; + } + if let MergeMode::Zero = merge_mode { + self.f.write_fixed_size("{z}")?; + } + Ok(()) + } + fn visit_reg_mask_merge_sae(&mut self, spec: RegSpec, mask: RegSpec, merge_mode: MergeMode, sae_mode: super::SaeMode) -> Result { + self.f.write_reg(spec)?; + if mask.num != 0 { + self.f.write_fixed_size("{")?; + self.f.write_reg(mask)?; + self.f.write_fixed_size("}")?; + } + if let MergeMode::Zero = merge_mode { + self.f.write_fixed_size("{z}")?; + } + self.f.write_sae_mode(sae_mode)?; + Ok(()) + } + fn visit_reg_mask_merge_sae_noround(&mut self, spec: RegSpec, mask: RegSpec, merge_mode: MergeMode) -> Result { + self.f.write_reg(spec)?; + if mask.num != 0 { + self.f.write_fixed_size("{")?; + self.f.write_reg(mask)?; + self.f.write_fixed_size("}")?; + } + if let MergeMode::Zero = merge_mode { + self.f.write_fixed_size("{z}")?; + } + self.f.write_fixed_size("{sae}")?; + Ok(()) + } + fn visit_abs_u16(&mut self, imm: u16) -> Result { + self.f.write_fixed_size("[")?; + self.f.write_fixed_size("0x")?; + self.f.write_u16(imm)?; + self.f.write_fixed_size("]")?; + Ok(()) + } + fn visit_abs_u32(&mut self, imm: u32) -> Result { + self.f.write_fixed_size("[")?; + self.f.write_fixed_size("0x")?; + self.f.write_u32(imm)?; + self.f.write_fixed_size("]")?; + Ok(()) + } + #[cfg_attr(not(feature="profiling"), inline(always))] + #[cfg_attr(feature="profiling", inline(never))] + fn visit_disp(&mut self, reg: RegSpec, disp: i32) -> Result { + self.f.write_char('[')?; + self.f.write_reg(reg)?; + self.f.write_fixed_size(" ")?; + + { + let mut v = disp as u32; + if disp < 0 { + self.f.write_fixed_size("- 0x")?; + v = -disp as u32; + } else { + self.f.write_fixed_size("+ 0x")?; } - &Operand::RegIndexBaseScaleDisp(ref base, ref index, scale, disp) => { - write!(f, "[{} + {} * {} ", - regspec_label(base), - regspec_label(index), - colors.number(scale), - )?; - format_number_i32(colors, f, disp, NumberStyleHint::HexSignedWithSignSplit)?; - write!(f, "]") - }, - &Operand::RegDispMasked(ref spec, disp, ref mask_reg) => { - write!(f, "[{} ", regspec_label(spec))?; - format_number_i32(colors, f, disp, NumberStyleHint::HexSignedWithSignSplit)?; - write!(f, "]")?; - write!(f, "{{{}}}", regspec_label(mask_reg)) - }, - &Operand::RegDerefMasked(ref spec, ref mask_reg) => { - f.write_str("[")?; - f.write_str(regspec_label(spec))?; - f.write_str("]")?; - write!(f, "{{{}}}", regspec_label(mask_reg)) - }, - &Operand::RegScaleMasked(ref spec, scale, ref mask_reg) => { - write!(f, "[{} * {}]", - regspec_label(spec), - colors.number(scale) - )?; - write!(f, "{{{}}}", regspec_label(mask_reg)) - }, - &Operand::RegScaleDispMasked(ref spec, scale, disp, ref mask_reg) => { - write!(f, "[{} * {} ", - regspec_label(spec), - colors.number(scale), - )?; - format_number_i32(colors, f, disp, NumberStyleHint::HexSignedWithSignSplit)?; - write!(f, "]")?; - write!(f, "{{{}}}", regspec_label(mask_reg)) - }, - &Operand::RegIndexBaseMasked(ref base, ref index, ref mask_reg) => { - f.write_str("[")?; - f.write_str(regspec_label(base))?; - f.write_str(" + ")?; - f.write_str(regspec_label(index))?; - f.write_str("]")?; - write!(f, "{{{}}}", regspec_label(mask_reg)) + self.f.write_u32(v)?; + } + self.f.write_fixed_size("]") + } + fn visit_deref(&mut self, reg: RegSpec) -> Result { + self.f.write_fixed_size("[")?; + self.f.write_reg(reg)?; + self.f.write_fixed_size("]") + } + fn visit_reg_scale(&mut self, reg: RegSpec, scale: u8) -> Result { + self.f.write_fixed_size("[")?; + self.f.write_reg(reg)?; + self.f.write_fixed_size(" * ")?; + self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc + self.f.write_fixed_size("]")?; + + Ok(()) + } + fn visit_reg_scale_disp(&mut self, reg: RegSpec, scale: u8, disp: i32) -> Result { + self.f.write_fixed_size("[")?; + self.f.write_reg(reg)?; + self.f.write_fixed_size(" * ")?; + self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc + self.f.write_fixed_size(" ")?; + + { + let mut v = disp as u32; + if disp < 0 { + self.f.write_fixed_size("- 0x")?; + v = -disp as u32; + } else { + self.f.write_fixed_size("+ 0x")?; } - &Operand::RegIndexBaseDispMasked(ref base, ref index, disp, ref mask_reg) => { - write!(f, "[{} + {} ", - regspec_label(base), - regspec_label(index), - )?; - format_number_i32(colors, f, disp, NumberStyleHint::HexSignedWithSignSplit)?; - write!(f, "]")?; - write!(f, "{{{}}}", regspec_label(mask_reg)) - }, - &Operand::RegIndexBaseScaleMasked(ref base, ref index, scale, ref mask_reg) => { - write!(f, "[{} + {} * {}]", - regspec_label(base), - regspec_label(index), - colors.number(scale) - )?; - write!(f, "{{{}}}", regspec_label(mask_reg)) + self.f.write_u32(v)?; + } + self.f.write_char(']') + } + fn visit_index_base_scale(&mut self, base: RegSpec, index: RegSpec, scale: u8) -> Result { + self.f.write_fixed_size("[")?; + self.f.write_reg(base)?; + self.f.write_fixed_size(" + ")?; + self.f.write_reg(index)?; + self.f.write_fixed_size(" * ")?; + self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc + self.f.write_fixed_size("]") + } + fn visit_index_base_scale_disp(&mut self, base: RegSpec, index: RegSpec, scale: u8, disp: i32) -> Result { + self.f.write_fixed_size("[")?; + self.f.write_reg(base)?; + self.f.write_fixed_size(" + ")?; + self.f.write_reg(index)?; + self.f.write_fixed_size(" * ")?; + self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc + self.f.write_fixed_size(" ")?; + + { + let mut v = disp as u32; + if disp < 0 { + self.f.write_fixed_size("- 0x")?; + v = -disp as u32; + } else { + self.f.write_fixed_size("+ 0x")?; } - &Operand::RegIndexBaseScaleDispMasked(ref base, ref index, scale, disp, ref mask_reg) => { - write!(f, "[{} + {} * {} ", - regspec_label(base), - regspec_label(index), - colors.number(scale), - )?; - format_number_i32(colors, f, disp, NumberStyleHint::HexSignedWithSignSplit)?; - write!(f, "]")?; - write!(f, "{{{}}}", regspec_label(mask_reg)) - }, - &Operand::Nothing => { Ok(()) }, + self.f.write_u32(v)?; + } + self.f.write_fixed_size("]") + } + fn visit_reg_disp_masked(&mut self, spec: RegSpec, disp: i32, mask_reg: RegSpec) -> Result { + self.f.write_char('[')?; + self.f.write_reg(spec)?; + self.f.write_char(' ')?; + let mut v = disp as u32; + if disp < 0 { + self.f.write_fixed_size("- 0x")?; + v = -disp as u32; + } else { + self.f.write_fixed_size("+ 0x")?; + } + self.f.write_u32(v)?; + self.f.write_char(']')?; + self.f.write_char('{')?; + self.f.write_reg(mask_reg)?; + self.f.write_char('}')?; + Ok(()) + } + fn visit_reg_deref_masked(&mut self, spec: RegSpec, mask_reg: RegSpec) -> Result { + self.f.write_fixed_size("[")?; + self.f.write_reg(spec)?; + self.f.write_fixed_size("]")?; + self.f.write_char('{')?; + self.f.write_reg(mask_reg)?; + self.f.write_char('}')?; + Ok(()) + } + fn visit_reg_scale_masked(&mut self, spec: RegSpec, scale: u8, mask_reg: RegSpec) -> Result { + self.f.write_fixed_size("[")?; + self.f.write_reg(spec)?; + self.f.write_fixed_size(" * ")?; + self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc + self.f.write_fixed_size("]")?; + self.f.write_char('{')?; + self.f.write_reg(mask_reg)?; + self.f.write_char('}')?; + Ok(()) + } + fn visit_reg_scale_disp_masked(&mut self, spec: RegSpec, scale: u8, disp: i32, mask_reg: RegSpec) -> Result { + self.f.write_fixed_size("[")?; + self.f.write_reg(spec)?; + self.f.write_fixed_size(" * ")?; + self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc + self.f.write_fixed_size(" ")?; + let mut v = disp as u32; + if disp < 0 { + self.f.write_fixed_size("- 0x")?; + v = -disp as u32; + } else { + self.f.write_fixed_size("+ 0x")?; + } + self.f.write_u32(v)?; + self.f.write_char(']')?; + self.f.write_char('{')?; + self.f.write_reg(mask_reg)?; + self.f.write_char('}')?; + Ok(()) + } + fn visit_index_base_masked(&mut self, base: RegSpec, index: RegSpec, mask_reg: RegSpec) -> Result { + self.f.write_fixed_size("[")?; + self.f.write_reg(base)?; + self.f.write_fixed_size(" + ")?; + self.f.write_reg(index)?; + self.f.write_fixed_size("]")?; + self.f.write_char('{')?; + self.f.write_reg(mask_reg)?; + self.f.write_char('}')?; + Ok(()) + } + fn visit_index_base_disp_masked(&mut self, base: RegSpec, index: RegSpec, disp: i32, mask_reg: RegSpec) -> Result { + self.f.write_fixed_size("[")?; + self.f.write_reg(base)?; + self.f.write_fixed_size(" + ")?; + self.f.write_reg(index)?; + self.f.write_fixed_size(" ")?; + let mut v = disp as u32; + if disp < 0 { + self.f.write_fixed_size("- 0x")?; + v = -disp as u32; + } else { + self.f.write_fixed_size("+ 0x")?; + } + self.f.write_u32(v)?; + self.f.write_char(']')?; + self.f.write_char('{')?; + self.f.write_reg(mask_reg)?; + self.f.write_char('}')?; + Ok(()) + } + fn visit_index_base_scale_masked(&mut self, base: RegSpec, index: RegSpec, scale: u8, mask_reg: RegSpec) -> Result { + self.f.write_fixed_size("[")?; + self.f.write_reg(base)?; + self.f.write_fixed_size(" + ")?; + self.f.write_reg(index)?; + self.f.write_fixed_size(" * ")?; + self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc + self.f.write_fixed_size("]")?; + self.f.write_char('{')?; + self.f.write_reg(mask_reg)?; + self.f.write_char('}')?; + Ok(()) + } + fn visit_index_base_scale_disp_masked(&mut self, base: RegSpec, index: RegSpec, scale: u8, disp: i32, mask_reg: RegSpec) -> Result { + self.f.write_fixed_size("[")?; + self.f.write_reg(base)?; + self.f.write_fixed_size(" + ")?; + self.f.write_reg(index)?; + self.f.write_fixed_size(" * ")?; + self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc + self.f.write_char(' ')?; + let mut v = disp as u32; + if disp < 0 { + self.f.write_fixed_size("- 0x")?; + v = -disp as u32; + } else { + self.f.write_fixed_size("+ 0x")?; } + self.f.write_u32(v)?; + self.f.write_char(']')?; + self.f.write_char('{')?; + self.f.write_reg(mask_reg)?; + self.f.write_char('}')?; + Ok(()) + } + fn visit_absolute_far_address(&mut self, segment: u16, address: u32) -> Result { + self.f.write_prefixed_u16(segment)?; + self.f.write_fixed_size(":")?; + self.f.write_prefixed_u32(address)?; + Ok(()) + } + + + fn visit_other(&mut self) -> Result { + Ok(()) } } diff --git a/src/protected_mode/mod.rs b/src/protected_mode/mod.rs index 2403a8a..b0a3f62 100644 --- a/src/protected_mode/mod.rs +++ b/src/protected_mode/mod.rs @@ -450,14 +450,14 @@ impl SaeMode { /// a human-friendly label for this `SaeMode`: /// /// ``` - /// use yaxpeax_x86::long_mode::SaeMode; + /// use yaxpeax_x86::protected_mode::SaeMode; /// /// assert_eq!(SaeMode::RoundNearest.label(), "{rne-sae}"); /// assert_eq!(SaeMode::RoundDown.label(), "{rd-sae}"); /// assert_eq!(SaeMode::RoundUp.label(), "{ru-sae}"); /// assert_eq!(SaeMode::RoundZero.label(), "{rz-sae}"); /// ``` - pub fn label(&self) -> &'static str { + pub const fn label(&self) -> &'static str { match self { SaeMode::RoundNearest => "{rne-sae}", SaeMode::RoundDown => "{rd-sae}", @@ -477,6 +477,42 @@ impl SaeMode { SAE_MODES[idx] } } + +pub trait OperandVisitor { + type Ok; + type Error; + + fn visit_reg(&mut self, reg: RegSpec) -> Result; + fn visit_deref(&mut self, reg: RegSpec) -> Result; + fn visit_disp(&mut self, reg: RegSpec, disp: i32) -> Result; + fn visit_reg_scale(&mut self, reg: RegSpec, scale: u8) -> Result; + fn visit_index_base_scale(&mut self, base: RegSpec, index: RegSpec, scale: u8) -> Result; + fn visit_index_base_scale_disp(&mut self, base: RegSpec, index: RegSpec, scale: u8, disp: i32) -> Result; + fn visit_reg_scale_disp(&mut self, reg: RegSpec, scale: u8, disp: i32) -> Result; + fn visit_i8(&mut self, imm: i8) -> Result; + fn visit_u8(&mut self, imm: u8) -> Result; + fn visit_i16(&mut self, imm: i16) -> Result; + fn visit_u16(&mut self, imm: u16) -> Result; + fn visit_i32(&mut self, imm: i32) -> Result; + fn visit_u32(&mut self, imm: u32) -> Result; + fn visit_abs_u16(&mut self, imm: u16) -> Result; + fn visit_abs_u32(&mut self, imm: u32) -> Result; + fn visit_reg_mask_merge(&mut self, base: RegSpec, mask: RegSpec, merge_mode: MergeMode) -> Result; + fn visit_reg_mask_merge_sae(&mut self, base: RegSpec, mask: RegSpec, merge_mode: MergeMode, sae_mode: SaeMode) -> Result; + fn visit_reg_mask_merge_sae_noround(&mut self, base: RegSpec, mask: RegSpec, merge_mode: MergeMode) -> Result; + fn visit_reg_disp_masked(&mut self, base: RegSpec, disp: i32, mask_reg: RegSpec) -> Result; + fn visit_reg_deref_masked(&mut self, base: RegSpec, mask_reg: RegSpec) -> Result; + fn visit_reg_scale_masked(&mut self, base: RegSpec, scale: u8, mask_reg: RegSpec) -> Result; + fn visit_reg_scale_disp_masked(&mut self, base: RegSpec, scale: u8, disp: i32, mask_reg: RegSpec) -> Result; + fn visit_index_base_masked(&mut self, base: RegSpec, index: RegSpec, mask_reg: RegSpec) -> Result; + fn visit_index_base_disp_masked(&mut self, base: RegSpec, index: RegSpec, disp: i32, mask_reg: RegSpec) -> Result; + fn visit_index_base_scale_masked(&mut self, base: RegSpec, index: RegSpec, scale: u8, mask_reg: RegSpec) -> Result; + fn visit_index_base_scale_disp_masked(&mut self, base: RegSpec, index: RegSpec, scale: u8, disp: i32, mask_reg: RegSpec) -> Result; + fn visit_absolute_far_address(&mut self, segment: u16, address: u32) -> Result; + + fn visit_other(&mut self) -> Result; +} + impl Operand { fn from_spec(inst: &Instruction, spec: OperandSpec) -> Operand { match spec { @@ -700,6 +736,51 @@ impl Operand { } } } + + /// provided for parity with [`Instruction::visit_operand`]. this has little utility other than + /// to reuse an `OperandVisitor` on an `Operand` directly. + pub fn visit(&self, visitor: &mut T) -> Result { + match self { + Operand::Nothing => { + visitor.visit_other() + } + Operand::Register(reg) => { + visitor.visit_reg(*reg) + } + Operand::RegDeref(reg) => { + visitor.visit_deref(*reg) + } + Operand::RegDisp(reg, disp) => { + visitor.visit_disp(*reg, *disp) + } + Operand::ImmediateI8(imm) => visitor.visit_i8(*imm), + Operand::ImmediateU8(imm) => visitor.visit_u8(*imm), + Operand::ImmediateI16(imm) => visitor.visit_i16(*imm), + Operand::ImmediateU16(imm) => visitor.visit_u16(*imm), + Operand::ImmediateI32(imm) => visitor.visit_i32(*imm), + Operand::ImmediateU32(imm) => visitor.visit_u32(*imm), + Operand::DisplacementU16(disp) => visitor.visit_abs_u16(*disp), + Operand::DisplacementU32(disp) => visitor.visit_abs_u32(*disp), + Operand::RegScale(reg, scale) => visitor.visit_reg_scale(*reg, *scale), + Operand::RegScaleDisp(reg, scale, disp) => visitor.visit_reg_scale_disp(*reg, *scale, *disp), + Operand::RegIndexBase(_, _) => { /* not actually reachable anymore */ visitor.visit_other() }, + Operand::RegIndexBaseDisp(_, _, _) => { /* not actually reachable anymore */ visitor.visit_other() }, + Operand::RegIndexBaseScale(base, index, scale) => visitor.visit_index_base_scale(*base, *index, *scale), + Operand::RegIndexBaseScaleDisp(base, index, scale, disp) => visitor.visit_index_base_scale_disp(*base, *index, *scale, *disp), + Operand::RegisterMaskMerge(reg, mask, merge) => visitor.visit_reg_mask_merge(*reg, *mask, *merge), + Operand::RegisterMaskMergeSae(reg, mask, merge, sae) => visitor.visit_reg_mask_merge_sae(*reg, *mask, *merge, *sae), + Operand::RegisterMaskMergeSaeNoround(reg, mask, merge) => visitor.visit_reg_mask_merge_sae_noround(*reg, *mask, *merge), + Operand::RegDerefMasked(reg, mask) => visitor.visit_reg_deref_masked(*reg, *mask), + Operand::RegDispMasked(reg, disp, mask) => visitor.visit_reg_disp_masked(*reg, *disp, *mask), + Operand::RegScaleMasked(reg, scale, mask) => visitor.visit_reg_scale_masked(*reg, *scale, *mask), + Operand::RegIndexBaseMasked(_, _, _) => { /* not actually reachable anymore */ visitor.visit_other() }, + Operand::RegIndexBaseDispMasked(_, _, _, _) => { /* not actually reachable anymore */ visitor.visit_other() }, + Operand::RegScaleDispMasked(base, scale, disp, mask) => visitor.visit_reg_scale_disp_masked(*base, *scale, *disp, *mask), + Operand::RegIndexBaseScaleMasked(base, index, scale, mask) => visitor.visit_index_base_scale_masked(*base, *index, *scale, *mask), + Operand::RegIndexBaseScaleDispMasked(base, index, scale, disp, mask) => visitor.visit_index_base_scale_disp_masked(*base, *index, *scale, *disp, *mask), + Operand::AbsoluteFarAddress { segment, address } => visitor.visit_absolute_far_address(*segment, *address), + } + } } #[test] From eec945bb991989a9dfcef2d5334746ba863e0abc Mon Sep 17 00:00:00 2001 From: iximeow Date: Sun, 23 Jun 2024 12:49:16 -0700 Subject: [PATCH 68/95] fix AbsoluteFarAddress being tagged as a memory operand --- src/protected_mode/mod.rs | 2 +- src/real_mode/mod.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/protected_mode/mod.rs b/src/protected_mode/mod.rs index b0a3f62..7c7a4c0 100644 --- a/src/protected_mode/mod.rs +++ b/src/protected_mode/mod.rs @@ -2676,7 +2676,7 @@ enum OperandSpec { RegIndexBaseScale_mask = 0xd6, RegIndexBaseScaleDisp_mask = 0xd7, // u16:u{16,32} immediate address for a far call - AbsoluteFarAddress = 0x98, + AbsoluteFarAddress = 0x18, } // the Hash, Eq, and PartialEq impls here are possibly misleading. diff --git a/src/real_mode/mod.rs b/src/real_mode/mod.rs index 2a3723f..8dd6741 100644 --- a/src/real_mode/mod.rs +++ b/src/real_mode/mod.rs @@ -2595,7 +2595,7 @@ enum OperandSpec { RegIndexBaseScale_mask = 0xd6, RegIndexBaseScaleDisp_mask = 0xd7, // u16:u{16,32} immediate address for a far call - AbsoluteFarAddress = 0x98, + AbsoluteFarAddress = 0x18, } // the Hash, Eq, and PartialEq impls here are possibly misleading. From b1213132953ff18efaa126e0b3f9f22e7ea44ed3 Mon Sep 17 00:00:00 2001 From: iximeow Date: Sun, 23 Jun 2024 12:53:41 -0700 Subject: [PATCH 69/95] adapt the rest of formating changes to protected_mode --- src/long_mode/display.rs | 6 +- src/protected_mode/display.rs | 469 ++++++++++++++++++++++------------ src/protected_mode/mod.rs | 165 ++++++++++++ 3 files changed, 480 insertions(+), 160 deletions(-) diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index bcea94a..869ac0b 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -272,7 +272,7 @@ struct DisplayingOperandVisitor<'a, T> { f: &'a mut T, } -impl crate::long_mode::OperandVisitor for DisplayingOperandVisitor<'_, T> { +impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> { type Ok = (); type Error = core::fmt::Error; @@ -4237,7 +4237,7 @@ struct RelativeBranchPrinter<'a, F: DisplaySink> { out: &'a mut F, } -impl<'a, F: DisplaySink> crate::long_mode::OperandVisitor for RelativeBranchPrinter<'a, F> { +impl<'a, F: DisplaySink> super::OperandVisitor for RelativeBranchPrinter<'a, F> { // return true if we printed a relative branch offset, false otherwise type Ok = bool; // but errors are errors @@ -4336,7 +4336,7 @@ impl<'a, F: DisplaySink> crate::long_mode::OperandVisitor for RelativeBranchPrin fn visit_reg_mask_merge(&mut self, _spec: RegSpec, _mask: RegSpec, _merge_mode: MergeMode) -> Result { Ok(false) } - fn visit_reg_mask_merge_sae(&mut self, _spec: RegSpec, _mask: RegSpec, _merge_mode: MergeMode, _sae_mode: crate::long_mode::SaeMode) -> Result { + fn visit_reg_mask_merge_sae(&mut self, _spec: RegSpec, _mask: RegSpec, _merge_mode: MergeMode, _sae_mode: super::SaeMode) -> Result { Ok(false) } fn visit_reg_mask_merge_sae_noround(&mut self, _spec: RegSpec, _mask: RegSpec, _merge_mode: MergeMode) -> Result { diff --git a/src/protected_mode/display.rs b/src/protected_mode/display.rs index 635893c..46bb185 100644 --- a/src/protected_mode/display.rs +++ b/src/protected_mode/display.rs @@ -187,6 +187,19 @@ impl fmt::Display for PrefixVex { } } +impl Segment { + fn name(&self) -> &'static [u8; 2] { + match self { + Segment::CS => b"cs", + Segment::DS => b"ds", + Segment::ES => b"es", + Segment::FS => b"fs", + Segment::GS => b"gs", + Segment::SS => b"ss", + } + } +} + impl fmt::Display for Segment { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { @@ -257,7 +270,7 @@ struct DisplayingOperandVisitor<'a, T> { f: &'a mut T, } -impl crate::protected_mode::OperandVisitor for DisplayingOperandVisitor<'_, T> { +impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> { type Ok = (); type Error = core::fmt::Error; @@ -3628,155 +3641,140 @@ impl Instruction { } } -fn contextualize_intel(instr: &Instruction, colors: &Y, _address: u32, _context: Option<&NoContext>, out: &mut T) -> fmt::Result { +pub(crate) fn contextualize_intel(instr: &Instruction, out: &mut T) -> fmt::Result { if instr.xacquire() { - out.write_str("xacquire ")?; + out.write_fixed_size("xacquire ")?; } if instr.xrelease() { - out.write_str("xrelease ")?; + out.write_fixed_size("xrelease ")?; } if instr.prefixes.lock() { - out.write_str("lock ")?; + out.write_fixed_size("lock ")?; } if instr.prefixes.rep_any() { if instr.opcode.can_rep() { if instr.prefixes.rep() { - write!(out, "rep ")?; + out.write_fixed_size("rep ")?; } else if instr.prefixes.repnz() { - write!(out, "repnz ")?; + out.write_fixed_size("repnz ")?; } } } - out.write_str(instr.opcode.name())?; - - if instr.opcode == Opcode::XBEGIN { - if (instr.imm as i32) >= 0 { - return write!(out, " $+{}", colors.number(signed_i32_hex(instr.imm as i32))); - } else { - return write!(out, " ${}", colors.number(signed_i32_hex(instr.imm as i32))); - } - } + out.write_opcode(instr.opcode)?; if instr.operand_count > 0 { - out.write_str(" ")?; - - let x = Operand::from_spec(instr, instr.operands[0]); - - const RELATIVE_BRANCHES: [Opcode; 21] = [ - Opcode::JMP, Opcode::JECXZ, - Opcode::LOOP, Opcode::LOOPZ, Opcode::LOOPNZ, - Opcode::JO, Opcode::JNO, - Opcode::JB, Opcode::JNB, - Opcode::JZ, Opcode::JNZ, - Opcode::JNA, Opcode::JA, - Opcode::JS, Opcode::JNS, - Opcode::JP, Opcode::JNP, - Opcode::JL, Opcode::JGE, - Opcode::JLE, Opcode::JG, - ]; - - if instr.operands[0] == OperandSpec::ImmI8 || instr.operands[0] == OperandSpec::ImmI32 { - if RELATIVE_BRANCHES.contains(&instr.opcode) { - return match x { - Operand::ImmediateI8(rel) => { - if rel >= 0 { - write!(out, "$+{}", colors.number(signed_i32_hex(rel as i32))) - } else { - write!(out, "${}", colors.number(signed_i32_hex(rel as i32))) - } - } - Operand::ImmediateI32(rel) => { - if rel >= 0 { - write!(out, "$+{}", colors.number(signed_i32_hex(rel))) - } else { - write!(out, "${}", colors.number(signed_i32_hex(rel))) - } - } - _ => { unreachable!() } - }; - } - } + out.write_fixed_size(" ")?; - if x.is_memory() { - out.write_str(MEM_SIZE_STRINGS[instr.mem_size as usize])?; - out.write_str(" ")?; + if instr.visit_operand(0, &mut RelativeBranchPrinter { + inst: instr, + out, + })? { + return Ok(()); } - if let Some(prefix) = instr.segment_override_for_op(0) { - write!(out, "{}:", prefix)?; + if instr.operands[0 as usize].is_memory() { + out.write_mem_size_label(instr.mem_size)?; + if let Some(prefix) = instr.segment_override_for_op(0) { + let name = prefix.name(); + out.write_char(' ')?; + out.write_char(name[0] as char)?; + out.write_char(name[1] as char)?; + out.write_fixed_size(":")?; + } else { + out.write_fixed_size(" ")?; + } } - x.colorize(colors, out)?; + + let mut displayer = DisplayingOperandVisitor { + f: out, + }; + instr.visit_operand(0 as u8, &mut displayer)?; for i in 1..instr.operand_count { - match instr.opcode { - _ => { - match &instr.operands[i as usize] { - &OperandSpec::Nothing => { - return Ok(()); - }, - _ => { - out.write_str(", ")?; - let x = Operand::from_spec(instr, instr.operands[i as usize]); - if x.is_memory() { - out.write_str(MEM_SIZE_STRINGS[instr.mem_size as usize])?; - out.write_str(" ")?; + // don't worry about checking for `instr.operands[i] != Nothing`, it would be a bug to + // reach that while iterating only to `operand_count`.. + out.write_fixed_size(", ")?; + if i >= 4 { + unsafe { core::hint::unreachable_unchecked(); } + } + + if instr.operands[i as usize].is_memory() { + out.write_mem_size_label(instr.mem_size)?; + if i >= 4 { + unsafe { core::hint::unreachable_unchecked(); } + } + if let Some(prefix) = instr.segment_override_for_op(i) { + let name = prefix.name(); + out.write_char(' ')?; + out.write_char(name[0] as char)?; + out.write_char(name[1] as char)?; + out.write_fixed_size(":")?; + } else { + out.write_fixed_size(" ")?; + } + } + + let mut displayer = DisplayingOperandVisitor { + f: out, + }; + + instr.visit_operand(i as u8, &mut displayer)?; + if let Some(evex) = instr.prefixes.evex() { + if evex.broadcast() && instr.operands[i as usize].is_memory() { + let scale = if instr.opcode == Opcode::VCVTPD2PS || instr.opcode == Opcode::VCVTTPD2UDQ || instr.opcode == Opcode::VCVTPD2UDQ || instr.opcode == Opcode::VCVTUDQ2PD || instr.opcode == Opcode::VCVTPS2PD || instr.opcode == Opcode::VCVTQQ2PS || instr.opcode == Opcode::VCVTDQ2PD || instr.opcode == Opcode::VCVTTPD2DQ || instr.opcode == Opcode::VFPCLASSPS || instr.opcode == Opcode::VFPCLASSPD || instr.opcode == Opcode::VCVTNEPS2BF16 || instr.opcode == Opcode::VCVTUQQ2PS || instr.opcode == Opcode::VCVTPD2DQ || instr.opcode == Opcode::VCVTTPS2UQQ || instr.opcode == Opcode::VCVTPS2UQQ || instr.opcode == Opcode::VCVTTPS2QQ || instr.opcode == Opcode::VCVTPS2QQ { + if instr.opcode == Opcode::VFPCLASSPS || instr.opcode == Opcode::VCVTNEPS2BF16 { + if evex.vex().l() { + 8 + } else if evex.lp() { + 16 + } else { + 4 } - if let Some(prefix) = instr.segment_override_for_op(i) { - write!(out, "{}:", prefix)?; + } else if instr.opcode == Opcode::VFPCLASSPD { + if evex.vex().l() { + 4 + } else if evex.lp() { + 8 + } else { + 2 } - x.colorize(colors, out)?; - if let Some(evex) = instr.prefixes.evex() { - if evex.broadcast() && x.is_memory() { - let scale = if instr.opcode == Opcode::VCVTPD2PS || instr.opcode == Opcode::VCVTTPD2UDQ || instr.opcode == Opcode::VCVTPD2UDQ || instr.opcode == Opcode::VCVTUDQ2PD || instr.opcode == Opcode::VCVTPS2PD || instr.opcode == Opcode::VCVTQQ2PS || instr.opcode == Opcode::VCVTDQ2PD || instr.opcode == Opcode::VCVTTPD2DQ || instr.opcode == Opcode::VFPCLASSPS || instr.opcode == Opcode::VFPCLASSPD || instr.opcode == Opcode::VCVTNEPS2BF16 || instr.opcode == Opcode::VCVTUQQ2PS || instr.opcode == Opcode::VCVTPD2DQ || instr.opcode == Opcode::VCVTTPS2UQQ || instr.opcode == Opcode::VCVTPS2UQQ || instr.opcode == Opcode::VCVTTPS2QQ || instr.opcode == Opcode::VCVTPS2QQ { - if instr.opcode == Opcode::VFPCLASSPS || instr.opcode == Opcode::VCVTNEPS2BF16 { - if evex.vex().l() { - 8 - } else if evex.lp() { - 16 - } else { - 4 - } - } else if instr.opcode == Opcode::VFPCLASSPD { - if evex.vex().l() { - 4 - } else if evex.lp() { - 8 - } else { - 2 - } - } else { - // vcvtpd2ps is "cool": in broadcast mode, it can read a - // double-precision float (qword), resize to single-precision, - // then broadcast that to the whole destination register. this - // means we need to show `xmm, qword [addr]{1to4}` if vector - // size is 256. likewise, scale of 8 for the same truncation - // reason if vector size is 512. - // vcvtudq2pd is the same story. - // vfpclassp{s,d} is a mystery to me. - if evex.vex().l() { - 4 - } else if evex.lp() { - 8 - } else { - 2 - } - } - } else { - // this should never be `None` - that would imply two - // memory operands for a broadcasted operation. - if let Some(width) = Operand::from_spec(instr, instr.operands[i as usize - 1]).width() { - width / instr.mem_size - } else { - 0 - } - }; - write!(out, "{{1to{}}}", scale)?; - } + } else { + // vcvtpd2ps is "cool": in broadcast mode, it can read a + // double-precision float (qword), resize to single-precision, + // then broadcast that to the whole destination register. this + // means we need to show `xmm, qword [addr]{1to4}` if vector + // size is 256. likewise, scale of 8 for the same truncation + // reason if vector size is 512. + // vcvtudq2pd is the same story. + // vfpclassp{s,d} is a mystery to me. + if evex.vex().l() { + 4 + } else if evex.lp() { + 8 + } else { + 2 } } + } else { + // this should never be `None` - that would imply two + // memory operands for a broadcasted operation. + if let Some(width) = Operand::from_spec(instr, instr.operands[i as usize - 1]).width() { + width / instr.mem_size + } else { + 0 + } + }; + out.write_fixed_size("{1to")?; + static STRING_LUT: &'static [&'static str] = &[ + "0", "1", "2", "3", "4", "5", "6", "7", "8", + "9", "10", "11", "12", "13", "14", "15", "16", + ]; + unsafe { + out.write_lt_16(STRING_LUT.get_kinda_unchecked(scale as usize))?; } + out.write_char('}')?; } } } @@ -3784,7 +3782,7 @@ fn contextualize_intel(instr: &Instruction, colors: Ok(()) } -fn contextualize_c(instr: &Instruction, colors: &Y, _address: u32, _context: Option<&NoContext>, out: &mut T) -> fmt::Result { +pub(crate) fn contextualize_c(instr: &Instruction, out: &mut T) -> fmt::Result { let mut brace_count = 0; let mut prefixed = false; @@ -3808,7 +3806,7 @@ fn contextualize_c(instr: &Instruction, colors: &Y, } if instr.prefixes.rep_any() { - if [Opcode::MOVS, Opcode::CMPS, Opcode::LODS, Opcode::STOS, Opcode::INS, Opcode::OUTS].contains(&instr.opcode) { + if instr.opcode.can_rep() { let word_str = match instr.mem_size { 1 => "byte", 2 => "word", @@ -3830,21 +3828,29 @@ fn contextualize_c(instr: &Instruction, colors: &Y, } } - fn write_jmp_operand(op: Operand, colors: &Y, out: &mut T) -> fmt::Result { + fn write_jmp_operand(op: Operand, out: &mut T) -> fmt::Result { + let mut out = yaxpeax_arch::display::FmtSink::new(out); + use core::fmt::Write; match op { Operand::ImmediateI8(rel) => { - if rel >= 0 { - write!(out, "$+{}", colors.number(signed_i32_hex(rel as i32))) + let rel = if rel >= 0 { + out.write_str("$+")?; + rel as u8 } else { - write!(out, "${}", colors.number(signed_i32_hex(rel as i32))) - } + out.write_str("$-")?; + rel.unsigned_abs() + }; + out.write_prefixed_u8(rel) } Operand::ImmediateI32(rel) => { - if rel >= 0 { - write!(out, "$+{}", colors.number(signed_i32_hex(rel))) + let rel = if rel >= 0 { + out.write_str("$+")?; + rel as u32 } else { - write!(out, "${}", colors.number(signed_i32_hex(rel))) - } + out.write_str("$-")?; + rel.unsigned_abs() + }; + out.write_prefixed_u32(rel) } other => { write!(out, "{}", other) @@ -4007,87 +4013,87 @@ fn contextualize_c(instr: &Instruction, colors: &Y, } Opcode::JMP => { out.write_str("jmp ")?; - write_jmp_operand(instr.operand(0), colors, out)?; + write_jmp_operand(instr.operand(0), out)?; }, Opcode::JECXZ => { out.write_str("if ecx == 0 then jmp ")?; - write_jmp_operand(instr.operand(0), colors, out)?; + write_jmp_operand(instr.operand(0), out)?; }, Opcode::LOOP => { out.write_str("ecx--; if ecx != 0 then jmp ")?; - write_jmp_operand(instr.operand(0), colors, out)?; + write_jmp_operand(instr.operand(0), out)?; }, Opcode::LOOPZ => { out.write_str("ecx--; if ecx != 0 and zero(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), colors, out)?; + write_jmp_operand(instr.operand(0), out)?; }, Opcode::LOOPNZ => { out.write_str("ecx--; if ecx != 0 and !zero(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), colors, out)?; + write_jmp_operand(instr.operand(0), out)?; }, Opcode::JO => { out.write_str("if _(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), colors, out)?; + write_jmp_operand(instr.operand(0), out)?; }, Opcode::JNO => { out.write_str("if _(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), colors, out)?; + write_jmp_operand(instr.operand(0), out)?; }, Opcode::JB => { out.write_str("if /* unsigned */ below(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), colors, out)?; + write_jmp_operand(instr.operand(0), out)?; }, Opcode::JNB => { out.write_str("if /* unsigned */ above_or_equal(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), colors, out)?; + write_jmp_operand(instr.operand(0), out)?; }, Opcode::JZ => { out.write_str("if zero(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), colors, out)?; + write_jmp_operand(instr.operand(0), out)?; }, Opcode::JNZ => { out.write_str("if !zero(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), colors, out)?; + write_jmp_operand(instr.operand(0), out)?; }, Opcode::JNA => { out.write_str("if /* unsigned */ below_or_equal(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), colors, out)?; + write_jmp_operand(instr.operand(0), out)?; }, Opcode::JA => { out.write_str("if /* unsigned */ above(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), colors, out)?; + write_jmp_operand(instr.operand(0), out)?; }, Opcode::JS => { out.write_str("if signed(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), colors, out)?; + write_jmp_operand(instr.operand(0), out)?; }, Opcode::JNS => { out.write_str("if !signed(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), colors, out)?; + write_jmp_operand(instr.operand(0), out)?; }, Opcode::JP => { out.write_str("if parity(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), colors, out)?; + write_jmp_operand(instr.operand(0), out)?; }, Opcode::JNP => { out.write_str("if !parity(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), colors, out)?; + write_jmp_operand(instr.operand(0), out)?; }, Opcode::JL => { out.write_str("if /* signed */ less(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), colors, out)?; + write_jmp_operand(instr.operand(0), out)?; }, Opcode::JGE => { out.write_str("if /* signed */ greater_or_equal(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), colors, out)?; + write_jmp_operand(instr.operand(0), out)?; }, Opcode::JLE => { out.write_str("if /* signed */ less_or_equal(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), colors, out)?; + write_jmp_operand(instr.operand(0), out)?; }, Opcode::JG => { out.write_str("if /* signed */ greater(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), colors, out)?; + write_jmp_operand(instr.operand(0), out)?; }, Opcode::NOP => { write!(out, "nop")?; @@ -4119,18 +4125,20 @@ fn contextualize_c(instr: &Instruction, colors: &Y, } impl <'instr, T: fmt::Write, Y: YaxColors> ShowContextual for InstructionDisplayer<'instr> { - fn contextualize(&self, colors: &Y, address: u32, context: Option<&NoContext>, out: &mut T) -> fmt::Result { + fn contextualize(&self, _colors: &Y, _address: u32, _context: Option<&NoContext>, out: &mut T) -> fmt::Result { let InstructionDisplayer { instr, style, } = self; + let mut out = yaxpeax_arch::display::FmtSink::new(out); + match style { DisplayStyle::Intel => { - contextualize_intel(instr, colors, address, context, out) + contextualize_intel(instr, &mut out) } DisplayStyle::C => { - contextualize_c(instr, colors, address, context, out) + contextualize_c(instr, &mut out) } } } @@ -4196,3 +4204,150 @@ impl ShowContextual { + inst: &'a Instruction, + out: &'a mut F, +} + +impl<'a, F: DisplaySink> super::OperandVisitor for RelativeBranchPrinter<'a, F> { + // return true if we printed a relative branch offset, false otherwise + type Ok = bool; + // but errors are errors + type Error = fmt::Error; + + fn visit_reg(&mut self, _reg: RegSpec) -> Result { + Ok(false) + } + fn visit_deref(&mut self, _reg: RegSpec) -> Result { + Ok(false) + } + fn visit_disp(&mut self, _reg: RegSpec, _disp: i32) -> Result { + Ok(false) + } + #[cfg_attr(feature="profiling", inline(never))] + fn visit_i8(&mut self, rel: i8) -> Result { + if RELATIVE_BRANCHES.contains(&self.inst.opcode) { + self.out.write_char('$')?; + // danger_anguished_string_write(self.out, "$"); + let mut v = rel as u8; + if rel < 0 { + self.out.write_char('-')?; + //danger_anguished_string_write(&mut self.out, "-"); + v = -rel as u8; + } else { + self.out.write_char('+')?; + // danger_anguished_string_write(&mut self.out, "+"); + } + self.out.write_fixed_size("0x")?; + self.out.write_u8(v)?; + Ok(true) + } else { + Ok(false) + } + } + #[cfg_attr(feature="profiling", inline(never))] + fn visit_i32(&mut self, rel: i32) -> Result { + if RELATIVE_BRANCHES.contains(&self.inst.opcode) || self.inst.opcode == Opcode::XBEGIN { + self.out.write_char('$')?; + // danger_anguished_string_write(self.out, "$"); + let mut v = rel as u32; + if rel < 0 { + self.out.write_char('-')?; + // danger_anguished_string_write(&mut self.out, "-"); + v = -rel as u32; + } else { + self.out.write_char('+')?; + // danger_anguished_string_write(&mut self.out, "+"); + } + self.out.write_fixed_size("0x")?; + self.out.write_u32(v)?; + Ok(true) + } else { + Ok(false) + } + } + fn visit_u8(&mut self, _imm: u8) -> Result { + Ok(false) + } + fn visit_i16(&mut self, _imm: i16) -> Result { + Ok(false) + } + fn visit_u16(&mut self, _imm: u16) -> Result { + Ok(false) + } + fn visit_u32(&mut self, _imm: u32) -> Result { + Ok(false) + } + fn visit_abs_u16(&mut self, _imm: u16) -> Result { + Ok(false) + } + fn visit_abs_u32(&mut self, _imm: u32) -> Result { + Ok(false) + } + fn visit_reg_scale(&mut self, _reg: RegSpec, _scale: u8) -> Result { + Ok(false) + } + fn visit_index_base_scale(&mut self, _base: RegSpec, _index: RegSpec, _scale: u8) -> Result { + Ok(false) + } + fn visit_reg_scale_disp(&mut self, _reg: RegSpec, _scale: u8, _disp: i32) -> Result { + Ok(false) + } + fn visit_index_base_scale_disp(&mut self, _base: RegSpec, _index: RegSpec, _scale: u8, _disp: i32) -> Result { + Ok(false) + } + fn visit_other(&mut self) -> Result { + Ok(false) + } + fn visit_reg_mask_merge(&mut self, _spec: RegSpec, _mask: RegSpec, _merge_mode: MergeMode) -> Result { + Ok(false) + } + fn visit_reg_mask_merge_sae(&mut self, _spec: RegSpec, _mask: RegSpec, _merge_mode: MergeMode, _sae_mode: super::SaeMode) -> Result { + Ok(false) + } + fn visit_reg_mask_merge_sae_noround(&mut self, _spec: RegSpec, _mask: RegSpec, _merge_mode: MergeMode) -> Result { + Ok(false) + } + fn visit_reg_disp_masked(&mut self, _spec: RegSpec, _disp: i32, _mask_reg: RegSpec) -> Result { + Ok(false) + } + fn visit_reg_deref_masked(&mut self, _spec: RegSpec, _mask_reg: RegSpec) -> Result { + Ok(false) + } + fn visit_reg_scale_masked(&mut self, _spec: RegSpec, _scale: u8, _mask_reg: RegSpec) -> Result { + Ok(false) + } + fn visit_reg_scale_disp_masked(&mut self, _spec: RegSpec, _scale: u8, _disp: i32, _mask_reg: RegSpec) -> Result { + Ok(false) + } + fn visit_index_base_masked(&mut self, _base: RegSpec, _index: RegSpec, _mask_reg: RegSpec) -> Result { + Ok(false) + } + fn visit_index_base_disp_masked(&mut self, _base: RegSpec, _index: RegSpec, _disp: i32, _mask_reg: RegSpec) -> Result { + Ok(false) + } + fn visit_index_base_scale_masked(&mut self, _base: RegSpec, _index: RegSpec, _scale: u8, _mask_reg: RegSpec) -> Result { + Ok(false) + } + fn visit_index_base_scale_disp_masked(&mut self, _base: RegSpec, _index: RegSpec, _scale: u8, _disp: i32, _mask_reg: RegSpec) -> Result { + Ok(false) + } + fn visit_absolute_far_address(&mut self, _segment: u16, _address: u32) -> Result { + Ok(false) + } +} diff --git a/src/protected_mode/mod.rs b/src/protected_mode/mod.rs index 7c7a4c0..74522d9 100644 --- a/src/protected_mode/mod.rs +++ b/src/protected_mode/mod.rs @@ -4332,6 +4332,171 @@ impl Instruction { Operand::from_spec(self, self.operands[i as usize]) } + /// TODO: make public, document, etc... + /// + /// `visit_operand` allows code using operands to better specialize and inline with the logic + /// that would construct an [`Operand`] variant, without having to necessarily construct an + /// `Operand` (including the attendant move of the enum). + /// + /// if the work you expect to do per-operand is very small, constructing an `Operand` and + /// dispatching on tags may be a substantial factor of overall runtime. `visit_operand` can + /// reduce total overhead in such cases. + #[cfg_attr(features="profiling", inline(never))] + fn visit_operand(&self, i: u8, visitor: &mut T) -> Result { + assert!(i < 4); + let spec = self.operands[i as usize]; + match spec { + OperandSpec::Nothing => { + visitor.visit_other() + } + OperandSpec::RegRRR => { + visitor.visit_reg(self.regs[0]) + } + OperandSpec::RegMMM => { + visitor.visit_reg(self.regs[1]) + } + OperandSpec::RegVex => { + visitor.visit_reg(self.regs[3]) + } + OperandSpec::Reg4 => { + visitor.visit_reg(RegSpec { num: self.imm as u8, bank: self.regs[3].bank }) + } + OperandSpec::Deref => { + visitor.visit_deref(self.regs[1]) + } + OperandSpec::Deref_si => { +// visitor.visit_other() + visitor.visit_deref(RegSpec::si()) + } + OperandSpec::Deref_di => { +// visitor.visit_other() + visitor.visit_deref(RegSpec::di()) + } + OperandSpec::Deref_esi => { +// visitor.visit_other() + visitor.visit_deref(RegSpec::esi()) + } + OperandSpec::Deref_edi => { +// visitor.visit_other() + visitor.visit_deref(RegSpec::edi()) + } + OperandSpec::RegDisp => { + visitor.visit_disp(self.regs[1], self.disp as i32) + } + OperandSpec::RegRRR_maskmerge => { + visitor.visit_reg_mask_merge( + self.regs[0], + RegSpec::mask(self.prefixes.evex_unchecked().mask_reg()), + MergeMode::from(self.prefixes.evex_unchecked().merge()), + ) + } + OperandSpec::RegRRR_maskmerge_sae => { + visitor.visit_reg_mask_merge_sae( + self.regs[0], + RegSpec::mask(self.prefixes.evex_unchecked().mask_reg()), + MergeMode::from(self.prefixes.evex_unchecked().merge()), + SaeMode::from(self.prefixes.evex_unchecked().vex().l(), self.prefixes.evex_unchecked().lp()), + ) + } + OperandSpec::RegRRR_maskmerge_sae_noround => { + visitor.visit_reg_mask_merge_sae_noround( + self.regs[0], + RegSpec::mask(self.prefixes.evex_unchecked().mask_reg()), + MergeMode::from(self.prefixes.evex_unchecked().merge()), + ) + } + OperandSpec::RegMMM_maskmerge => { + visitor.visit_reg_mask_merge( + self.regs[1], + RegSpec::mask(self.prefixes.evex_unchecked().mask_reg()), + MergeMode::from(self.prefixes.evex_unchecked().merge()), + ) + } + OperandSpec::RegMMM_maskmerge_sae_noround => { + visitor.visit_reg_mask_merge_sae_noround( + self.regs[1], + RegSpec::mask(self.prefixes.evex_unchecked().mask_reg()), + MergeMode::from(self.prefixes.evex_unchecked().merge()), + ) + } + OperandSpec::RegVex_maskmerge => { + visitor.visit_reg_mask_merge( + self.regs[3], + RegSpec::mask(self.prefixes.evex_unchecked().mask_reg()), + MergeMode::from(self.prefixes.evex_unchecked().merge()), + ) + } + OperandSpec::ImmI8 => visitor.visit_i8(self.imm as i8), + OperandSpec::ImmU8 => visitor.visit_u8(self.imm as u8), + OperandSpec::ImmI16 => visitor.visit_i16(self.imm as i16), + OperandSpec::ImmU16 => visitor.visit_u16(self.imm as u16), + OperandSpec::ImmI32 => visitor.visit_i32(self.imm as i32), + OperandSpec::ImmInDispField => visitor.visit_u16(self.disp as u16), + OperandSpec::DispU16 => visitor.visit_abs_u16(self.disp as u16), + OperandSpec::DispU32 => visitor.visit_abs_u32(self.disp as u32), + OperandSpec::RegScale => { + visitor.visit_reg_scale(self.regs[2], self.scale) + } + OperandSpec::RegScaleDisp => { + visitor.visit_reg_scale_disp(self.regs[2], self.scale, self.disp as i32) + } + OperandSpec::RegIndexBaseScale => { + visitor.visit_index_base_scale(self.regs[1], self.regs[2], self.scale) + /* + Operand::RegIndexBaseScale(self.regs[1], self.regs[2], self.scale) + */ + } + OperandSpec::RegIndexBaseScaleDisp => { + visitor.visit_index_base_scale_disp(self.regs[1], self.regs[2], self.scale, self.disp as i32) + } + OperandSpec::Deref_mask => { + if self.prefixes.evex_unchecked().mask_reg() != 0 { + visitor.visit_reg_deref_masked(self.regs[1], RegSpec::mask(self.prefixes.evex_unchecked().mask_reg())) + } else { + visitor.visit_deref(self.regs[1]) + } + } + OperandSpec::RegDisp_mask => { + if self.prefixes.evex_unchecked().mask_reg() != 0 { + visitor.visit_reg_disp_masked(self.regs[1], self.disp as i32, RegSpec::mask(self.prefixes.evex_unchecked().mask_reg())) + } else { + visitor.visit_disp(self.regs[1], self.disp as i32) + } + } + OperandSpec::RegScale_mask => { + if self.prefixes.evex_unchecked().mask_reg() != 0 { + visitor.visit_reg_scale_masked(self.regs[2], self.scale, RegSpec::mask(self.prefixes.evex_unchecked().mask_reg())) + } else { + visitor.visit_reg_scale(self.regs[2], self.scale) + } + } + OperandSpec::RegScaleDisp_mask => { + if self.prefixes.evex_unchecked().mask_reg() != 0 { + visitor.visit_reg_scale_disp_masked(self.regs[2], self.scale, self.disp as i32, RegSpec::mask(self.prefixes.evex_unchecked().mask_reg())) + } else { + visitor.visit_reg_scale_disp(self.regs[2], self.scale, self.disp as i32) + } + } + OperandSpec::RegIndexBaseScale_mask => { + if self.prefixes.evex_unchecked().mask_reg() != 0 { + visitor.visit_index_base_scale_masked(self.regs[1], self.regs[2], self.scale, RegSpec::mask(self.prefixes.evex_unchecked().mask_reg())) + } else { + visitor.visit_index_base_scale(self.regs[1], self.regs[2], self.scale) + } + } + OperandSpec::RegIndexBaseScaleDisp_mask => { + if self.prefixes.evex_unchecked().mask_reg() != 0 { + visitor.visit_index_base_scale_disp_masked(self.regs[1], self.regs[2], self.scale, self.disp as i32, RegSpec::mask(self.prefixes.evex_unchecked().mask_reg())) + } else { + visitor.visit_index_base_scale_disp(self.regs[1], self.regs[2], self.scale, self.disp as i32) + } + } + OperandSpec::AbsoluteFarAddress => { + visitor.visit_absolute_far_address(self.disp as u16, self.imm as u32) + } + } + } + /// get the number of operands in this instruction. useful in iterating an instruction's /// operands generically. pub fn operand_count(&self) -> u8 { From 22528447297ef81b65e91fae68fbcd6010065505 Mon Sep 17 00:00:00 2001 From: iximeow Date: Sun, 23 Jun 2024 13:07:51 -0700 Subject: [PATCH 70/95] adapt OperandVisitor and related to real_mode --- src/real_mode/mod.rs | 250 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 248 insertions(+), 2 deletions(-) diff --git a/src/real_mode/mod.rs b/src/real_mode/mod.rs index 8dd6741..b1aaf7d 100644 --- a/src/real_mode/mod.rs +++ b/src/real_mode/mod.rs @@ -450,14 +450,14 @@ impl SaeMode { /// a human-friendly label for this `SaeMode`: /// /// ``` - /// use yaxpeax_x86::long_mode::SaeMode; + /// use yaxpeax_x86::real_mode::SaeMode; /// /// assert_eq!(SaeMode::RoundNearest.label(), "{rne-sae}"); /// assert_eq!(SaeMode::RoundDown.label(), "{rd-sae}"); /// assert_eq!(SaeMode::RoundUp.label(), "{ru-sae}"); /// assert_eq!(SaeMode::RoundZero.label(), "{rz-sae}"); /// ``` - pub fn label(&self) -> &'static str { + pub const fn label(&self) -> &'static str { match self { SaeMode::RoundNearest => "{rne-sae}", SaeMode::RoundDown => "{rd-sae}", @@ -477,6 +477,42 @@ impl SaeMode { SAE_MODES[idx] } } + +pub trait OperandVisitor { + type Ok; + type Error; + + fn visit_reg(&mut self, reg: RegSpec) -> Result; + fn visit_deref(&mut self, reg: RegSpec) -> Result; + fn visit_disp(&mut self, reg: RegSpec, disp: i32) -> Result; + fn visit_reg_scale(&mut self, reg: RegSpec, scale: u8) -> Result; + fn visit_index_base_scale(&mut self, base: RegSpec, index: RegSpec, scale: u8) -> Result; + fn visit_index_base_scale_disp(&mut self, base: RegSpec, index: RegSpec, scale: u8, disp: i32) -> Result; + fn visit_reg_scale_disp(&mut self, reg: RegSpec, scale: u8, disp: i32) -> Result; + fn visit_i8(&mut self, imm: i8) -> Result; + fn visit_u8(&mut self, imm: u8) -> Result; + fn visit_i16(&mut self, imm: i16) -> Result; + fn visit_u16(&mut self, imm: u16) -> Result; + fn visit_i32(&mut self, imm: i32) -> Result; + fn visit_u32(&mut self, imm: u32) -> Result; + fn visit_abs_u16(&mut self, imm: u16) -> Result; + fn visit_abs_u32(&mut self, imm: u32) -> Result; + fn visit_reg_mask_merge(&mut self, base: RegSpec, mask: RegSpec, merge_mode: MergeMode) -> Result; + fn visit_reg_mask_merge_sae(&mut self, base: RegSpec, mask: RegSpec, merge_mode: MergeMode, sae_mode: SaeMode) -> Result; + fn visit_reg_mask_merge_sae_noround(&mut self, base: RegSpec, mask: RegSpec, merge_mode: MergeMode) -> Result; + fn visit_reg_disp_masked(&mut self, base: RegSpec, disp: i32, mask_reg: RegSpec) -> Result; + fn visit_reg_deref_masked(&mut self, base: RegSpec, mask_reg: RegSpec) -> Result; + fn visit_reg_scale_masked(&mut self, base: RegSpec, scale: u8, mask_reg: RegSpec) -> Result; + fn visit_reg_scale_disp_masked(&mut self, base: RegSpec, scale: u8, disp: i32, mask_reg: RegSpec) -> Result; + fn visit_index_base_masked(&mut self, base: RegSpec, index: RegSpec, mask_reg: RegSpec) -> Result; + fn visit_index_base_disp_masked(&mut self, base: RegSpec, index: RegSpec, disp: i32, mask_reg: RegSpec) -> Result; + fn visit_index_base_scale_masked(&mut self, base: RegSpec, index: RegSpec, scale: u8, mask_reg: RegSpec) -> Result; + fn visit_index_base_scale_disp_masked(&mut self, base: RegSpec, index: RegSpec, scale: u8, disp: i32, mask_reg: RegSpec) -> Result; + fn visit_absolute_far_address(&mut self, segment: u16, address: u32) -> Result; + + fn visit_other(&mut self) -> Result; +} + impl Operand { fn from_spec(inst: &Instruction, spec: OperandSpec) -> Operand { match spec { @@ -700,6 +736,51 @@ impl Operand { } } } + + /// provided for parity with [`Instruction::visit_operand`]. this has little utility other than + /// to reuse an `OperandVisitor` on an `Operand` directly. + pub fn visit(&self, visitor: &mut T) -> Result { + match self { + Operand::Nothing => { + visitor.visit_other() + } + Operand::Register(reg) => { + visitor.visit_reg(*reg) + } + Operand::RegDeref(reg) => { + visitor.visit_deref(*reg) + } + Operand::RegDisp(reg, disp) => { + visitor.visit_disp(*reg, *disp) + } + Operand::ImmediateI8(imm) => visitor.visit_i8(*imm), + Operand::ImmediateU8(imm) => visitor.visit_u8(*imm), + Operand::ImmediateI16(imm) => visitor.visit_i16(*imm), + Operand::ImmediateU16(imm) => visitor.visit_u16(*imm), + Operand::ImmediateI32(imm) => visitor.visit_i32(*imm), + Operand::ImmediateU32(imm) => visitor.visit_u32(*imm), + Operand::DisplacementU16(disp) => visitor.visit_abs_u16(*disp), + Operand::DisplacementU32(disp) => visitor.visit_abs_u32(*disp), + Operand::RegScale(reg, scale) => visitor.visit_reg_scale(*reg, *scale), + Operand::RegScaleDisp(reg, scale, disp) => visitor.visit_reg_scale_disp(*reg, *scale, *disp), + Operand::RegIndexBase(_, _) => { /* not actually reachable anymore */ visitor.visit_other() }, + Operand::RegIndexBaseDisp(_, _, _) => { /* not actually reachable anymore */ visitor.visit_other() }, + Operand::RegIndexBaseScale(base, index, scale) => visitor.visit_index_base_scale(*base, *index, *scale), + Operand::RegIndexBaseScaleDisp(base, index, scale, disp) => visitor.visit_index_base_scale_disp(*base, *index, *scale, *disp), + Operand::RegisterMaskMerge(reg, mask, merge) => visitor.visit_reg_mask_merge(*reg, *mask, *merge), + Operand::RegisterMaskMergeSae(reg, mask, merge, sae) => visitor.visit_reg_mask_merge_sae(*reg, *mask, *merge, *sae), + Operand::RegisterMaskMergeSaeNoround(reg, mask, merge) => visitor.visit_reg_mask_merge_sae_noround(*reg, *mask, *merge), + Operand::RegDerefMasked(reg, mask) => visitor.visit_reg_deref_masked(*reg, *mask), + Operand::RegDispMasked(reg, disp, mask) => visitor.visit_reg_disp_masked(*reg, *disp, *mask), + Operand::RegScaleMasked(reg, scale, mask) => visitor.visit_reg_scale_masked(*reg, *scale, *mask), + Operand::RegIndexBaseMasked(_, _, _) => { /* not actually reachable anymore */ visitor.visit_other() }, + Operand::RegIndexBaseDispMasked(_, _, _, _) => { /* not actually reachable anymore */ visitor.visit_other() }, + Operand::RegScaleDispMasked(base, scale, disp, mask) => visitor.visit_reg_scale_disp_masked(*base, *scale, *disp, *mask), + Operand::RegIndexBaseScaleMasked(base, index, scale, mask) => visitor.visit_index_base_scale_masked(*base, *index, *scale, *mask), + Operand::RegIndexBaseScaleDispMasked(base, index, scale, disp, mask) => visitor.visit_index_base_scale_disp_masked(*base, *index, *scale, *disp, *mask), + Operand::AbsoluteFarAddress { segment, address } => visitor.visit_absolute_far_address(*segment, *address), + } + } } #[test] @@ -4251,6 +4332,171 @@ impl Instruction { Operand::from_spec(self, self.operands[i as usize]) } + /// TODO: make public, document, etc... + /// + /// `visit_operand` allows code using operands to better specialize and inline with the logic + /// that would construct an [`Operand`] variant, without having to necessarily construct an + /// `Operand` (including the attendant move of the enum). + /// + /// if the work you expect to do per-operand is very small, constructing an `Operand` and + /// dispatching on tags may be a substantial factor of overall runtime. `visit_operand` can + /// reduce total overhead in such cases. + #[cfg_attr(features="profiling", inline(never))] + fn visit_operand(&self, i: u8, visitor: &mut T) -> Result { + assert!(i < 4); + let spec = self.operands[i as usize]; + match spec { + OperandSpec::Nothing => { + visitor.visit_other() + } + OperandSpec::RegRRR => { + visitor.visit_reg(self.regs[0]) + } + OperandSpec::RegMMM => { + visitor.visit_reg(self.regs[1]) + } + OperandSpec::RegVex => { + visitor.visit_reg(self.regs[3]) + } + OperandSpec::Reg4 => { + visitor.visit_reg(RegSpec { num: self.imm as u8, bank: self.regs[3].bank }) + } + OperandSpec::Deref => { + visitor.visit_deref(self.regs[1]) + } + OperandSpec::Deref_si => { +// visitor.visit_other() + visitor.visit_deref(RegSpec::si()) + } + OperandSpec::Deref_di => { +// visitor.visit_other() + visitor.visit_deref(RegSpec::di()) + } + OperandSpec::Deref_esi => { +// visitor.visit_other() + visitor.visit_deref(RegSpec::esi()) + } + OperandSpec::Deref_edi => { +// visitor.visit_other() + visitor.visit_deref(RegSpec::edi()) + } + OperandSpec::RegDisp => { + visitor.visit_disp(self.regs[1], self.disp as i32) + } + OperandSpec::RegRRR_maskmerge => { + visitor.visit_reg_mask_merge( + self.regs[0], + RegSpec::mask(self.prefixes.evex_unchecked().mask_reg()), + MergeMode::from(self.prefixes.evex_unchecked().merge()), + ) + } + OperandSpec::RegRRR_maskmerge_sae => { + visitor.visit_reg_mask_merge_sae( + self.regs[0], + RegSpec::mask(self.prefixes.evex_unchecked().mask_reg()), + MergeMode::from(self.prefixes.evex_unchecked().merge()), + SaeMode::from(self.prefixes.evex_unchecked().vex().l(), self.prefixes.evex_unchecked().lp()), + ) + } + OperandSpec::RegRRR_maskmerge_sae_noround => { + visitor.visit_reg_mask_merge_sae_noround( + self.regs[0], + RegSpec::mask(self.prefixes.evex_unchecked().mask_reg()), + MergeMode::from(self.prefixes.evex_unchecked().merge()), + ) + } + OperandSpec::RegMMM_maskmerge => { + visitor.visit_reg_mask_merge( + self.regs[1], + RegSpec::mask(self.prefixes.evex_unchecked().mask_reg()), + MergeMode::from(self.prefixes.evex_unchecked().merge()), + ) + } + OperandSpec::RegMMM_maskmerge_sae_noround => { + visitor.visit_reg_mask_merge_sae_noround( + self.regs[1], + RegSpec::mask(self.prefixes.evex_unchecked().mask_reg()), + MergeMode::from(self.prefixes.evex_unchecked().merge()), + ) + } + OperandSpec::RegVex_maskmerge => { + visitor.visit_reg_mask_merge( + self.regs[3], + RegSpec::mask(self.prefixes.evex_unchecked().mask_reg()), + MergeMode::from(self.prefixes.evex_unchecked().merge()), + ) + } + OperandSpec::ImmI8 => visitor.visit_i8(self.imm as i8), + OperandSpec::ImmU8 => visitor.visit_u8(self.imm as u8), + OperandSpec::ImmI16 => visitor.visit_i16(self.imm as i16), + OperandSpec::ImmU16 => visitor.visit_u16(self.imm as u16), + OperandSpec::ImmI32 => visitor.visit_i32(self.imm as i32), + OperandSpec::ImmInDispField => visitor.visit_u16(self.disp as u16), + OperandSpec::DispU16 => visitor.visit_abs_u16(self.disp as u16), + OperandSpec::DispU32 => visitor.visit_abs_u32(self.disp as u32), + OperandSpec::RegScale => { + visitor.visit_reg_scale(self.regs[2], self.scale) + } + OperandSpec::RegScaleDisp => { + visitor.visit_reg_scale_disp(self.regs[2], self.scale, self.disp as i32) + } + OperandSpec::RegIndexBaseScale => { + visitor.visit_index_base_scale(self.regs[1], self.regs[2], self.scale) + /* + Operand::RegIndexBaseScale(self.regs[1], self.regs[2], self.scale) + */ + } + OperandSpec::RegIndexBaseScaleDisp => { + visitor.visit_index_base_scale_disp(self.regs[1], self.regs[2], self.scale, self.disp as i32) + } + OperandSpec::Deref_mask => { + if self.prefixes.evex_unchecked().mask_reg() != 0 { + visitor.visit_reg_deref_masked(self.regs[1], RegSpec::mask(self.prefixes.evex_unchecked().mask_reg())) + } else { + visitor.visit_deref(self.regs[1]) + } + } + OperandSpec::RegDisp_mask => { + if self.prefixes.evex_unchecked().mask_reg() != 0 { + visitor.visit_reg_disp_masked(self.regs[1], self.disp as i32, RegSpec::mask(self.prefixes.evex_unchecked().mask_reg())) + } else { + visitor.visit_disp(self.regs[1], self.disp as i32) + } + } + OperandSpec::RegScale_mask => { + if self.prefixes.evex_unchecked().mask_reg() != 0 { + visitor.visit_reg_scale_masked(self.regs[2], self.scale, RegSpec::mask(self.prefixes.evex_unchecked().mask_reg())) + } else { + visitor.visit_reg_scale(self.regs[2], self.scale) + } + } + OperandSpec::RegScaleDisp_mask => { + if self.prefixes.evex_unchecked().mask_reg() != 0 { + visitor.visit_reg_scale_disp_masked(self.regs[2], self.scale, self.disp as i32, RegSpec::mask(self.prefixes.evex_unchecked().mask_reg())) + } else { + visitor.visit_reg_scale_disp(self.regs[2], self.scale, self.disp as i32) + } + } + OperandSpec::RegIndexBaseScale_mask => { + if self.prefixes.evex_unchecked().mask_reg() != 0 { + visitor.visit_index_base_scale_masked(self.regs[1], self.regs[2], self.scale, RegSpec::mask(self.prefixes.evex_unchecked().mask_reg())) + } else { + visitor.visit_index_base_scale(self.regs[1], self.regs[2], self.scale) + } + } + OperandSpec::RegIndexBaseScaleDisp_mask => { + if self.prefixes.evex_unchecked().mask_reg() != 0 { + visitor.visit_index_base_scale_disp_masked(self.regs[1], self.regs[2], self.scale, self.disp as i32, RegSpec::mask(self.prefixes.evex_unchecked().mask_reg())) + } else { + visitor.visit_index_base_scale_disp(self.regs[1], self.regs[2], self.scale, self.disp as i32) + } + } + OperandSpec::AbsoluteFarAddress => { + visitor.visit_absolute_far_address(self.disp as u16, self.imm as u32) + } + } + } + /// get the number of operands in this instruction. useful in iterating an instruction's /// operands generically. pub fn operand_count(&self) -> u8 { From 949aa2eef7259ae6eac1ec78ae5936d2ee05e8bc Mon Sep 17 00:00:00 2001 From: iximeow Date: Sun, 23 Jun 2024 13:08:11 -0700 Subject: [PATCH 71/95] fix inlining attributes re. profiling flag in protected_mode --- src/protected_mode/mod.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/protected_mode/mod.rs b/src/protected_mode/mod.rs index 74522d9..f2b10f5 100644 --- a/src/protected_mode/mod.rs +++ b/src/protected_mode/mod.rs @@ -5965,7 +5965,7 @@ const OPCODES: [OpcodeRecord; 256] = [ #[allow(non_snake_case)] #[cfg_attr(feature="profiling", inline(never))] -#[cfg_attr(not(feature="profiling"), inline(never))] +#[cfg_attr(not(feature="profiling"), inline(always))] pub(self) fn read_E< T: Reader<::Address, ::Word>, S: DescriptionSink, @@ -6755,7 +6755,7 @@ fn read_opc_hotpath< } #[cfg_attr(feature="profiling", inline(never))] -#[cfg_attr(not(feature="profiling"), inline(never))] +#[cfg_attr(not(feature="profiling"), inline(always))] fn read_with_annotations< T: Reader<::Address, ::Word>, S: DescriptionSink, @@ -6917,7 +6917,7 @@ fn read_with_annotations< } #[cfg_attr(feature="profiling", inline(never))] -#[cfg_attr(not(feature="profiling"), inline(never))] +#[cfg_attr(not(feature="profiling"), inline(always))] fn read_operands< T: Reader<::Address, ::Word>, S: DescriptionSink From f70232d2bc7703eb062820be8896daf06d94d032 Mon Sep 17 00:00:00 2001 From: iximeow Date: Sun, 23 Jun 2024 13:15:50 -0700 Subject: [PATCH 72/95] normalize imports, pull safer_unchecked from yaxpeax-arch --- src/long_mode/display.rs | 2 +- src/protected_mode/display.rs | 3 +-- src/real_mode/display.rs | 5 +++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index 869ac0b..4ffc431 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -2,11 +2,11 @@ use core::fmt; use yaxpeax_arch::{Colorize, ShowContextual, NoColors, YaxColors}; -use crate::safer_unchecked::GetSaferUnchecked as _; use crate::MEM_SIZE_STRINGS; use crate::long_mode::{RegSpec, Opcode, Operand, MergeMode, InstDecoder, Instruction, Segment, PrefixRex, OperandSpec}; use yaxpeax_arch::display::DisplaySink; +use yaxpeax_arch::safer_unchecked::GetSaferUnchecked as _; trait DisplaySinkExt { // `write_opcode` depends on all mnemonics being less than 32 bytes long. check that here, at diff --git a/src/protected_mode/display.rs b/src/protected_mode/display.rs index 46bb185..2497052 100644 --- a/src/protected_mode/display.rs +++ b/src/protected_mode/display.rs @@ -1,13 +1,12 @@ use core::fmt; -use crate::safer_unchecked::GetSaferUnchecked as _; use yaxpeax_arch::{Colorize, ShowContextual, NoColors, YaxColors}; -use yaxpeax_arch::display::*; use crate::MEM_SIZE_STRINGS; use crate::protected_mode::{RegSpec, Opcode, Operand, MergeMode, InstDecoder, Instruction, Segment, PrefixVex, OperandSpec}; use yaxpeax_arch::display::DisplaySink; +use yaxpeax_arch::safer_unchecked::GetSaferUnchecked as _; trait DisplaySinkExt { // `write_opcode` depends on all mnemonics being less than 32 bytes long. check that here, at diff --git a/src/real_mode/display.rs b/src/real_mode/display.rs index b06e3d2..3e3f524 100644 --- a/src/real_mode/display.rs +++ b/src/real_mode/display.rs @@ -1,12 +1,13 @@ use core::fmt; use yaxpeax_arch::{Colorize, ShowContextual, NoColors, YaxColors}; -use yaxpeax_arch::display::*; -use crate::safer_unchecked::GetSaferUnchecked as _; use crate::MEM_SIZE_STRINGS; use crate::real_mode::{RegSpec, Opcode, Operand, MergeMode, InstDecoder, Instruction, Segment, PrefixVex, OperandSpec}; +use yaxpeax_arch::display::DisplaySink; +use yaxpeax_arch::safer_unchecked::GetSaferUnchecked as _; + impl fmt::Display for InstDecoder { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { if self == &InstDecoder::default() { From dc500de235361284017e917f28d09c3304973b9c Mon Sep 17 00:00:00 2001 From: iximeow Date: Sun, 23 Jun 2024 13:16:06 -0700 Subject: [PATCH 73/95] adapt protected-mode display to real mode --- src/real_mode/display.rs | 1111 ++++++++++++++++++++++++++------------ 1 file changed, 764 insertions(+), 347 deletions(-) diff --git a/src/real_mode/display.rs b/src/real_mode/display.rs index 3e3f524..f574499 100644 --- a/src/real_mode/display.rs +++ b/src/real_mode/display.rs @@ -8,6 +8,102 @@ use crate::real_mode::{RegSpec, Opcode, Operand, MergeMode, InstDecoder, Instruc use yaxpeax_arch::display::DisplaySink; use yaxpeax_arch::safer_unchecked::GetSaferUnchecked as _; +trait DisplaySinkExt { + // `write_opcode` depends on all mnemonics being less than 32 bytes long. check that here, at + // compile time. referenced later to force evaluation of this const. + const MNEMONIC_LT_32: () = { + let mut i = 0; + while i < MNEMONICS.len() { + let name = &MNEMONICS[i]; + if name.len() >= 32 { + panic!("mnemonic too long"); + } + i += 1; + } + }; + + // `write_reg` depends on all register names being less than 8 bytes long. check that here, at + // compile time. referenced later to force evaluation of this const. + const REG_LABEL_LT_8: () = { + let mut i = 0; + while i < REG_NAMES.len() { + let name = ®_NAMES[i]; + if name.len() >= 8 { + panic!("register name too long"); + } + i += 1; + } + }; + + // `write_mem_size_label` depends on all memory size labels being less than 8 bytes long. check + // that here, at compile time. referenced later to force evaluation of this const. + const MEM_SIZE_LABEL_LT_8: () = { + let mut i = 0; + while i < crate::MEM_SIZE_STRINGS.len() { + let name = &MEM_SIZE_STRINGS[i]; + if name.len() >= 8 { + panic!("memory label name too long"); + } + i += 1; + } + }; + + // `write_sae_mode` depends on all sae mode labels being less than 16 bytes long. check that + // here, at compile time. referenced later to force evaluation of this const. + const SAE_LABEL_LT_16: () = { + let mut i = 0; + while i < super::SAE_MODES.len() { + let mode = &super::SAE_MODES[i]; + if mode.label().len() >= 16 { + panic!("sae mode label too long"); + } + i += 1; + } + }; + + fn write_opcode(&mut self, opcode: super::Opcode) -> Result<(), core::fmt::Error>; + fn write_reg(&mut self, reg: RegSpec) -> Result<(), core::fmt::Error>; + fn write_mem_size_label(&mut self, mem_size: u8) -> Result<(), core::fmt::Error>; + fn write_sae_mode(&mut self, sae: super::SaeMode) -> Result<(), core::fmt::Error>; +} + +impl DisplaySinkExt for T { + #[inline(always)] + fn write_opcode(&mut self, opcode: super::Opcode) -> Result<(), core::fmt::Error> { + let name = opcode.name(); + + let _ = Self::MNEMONIC_LT_32; + // Safety: all opcode mnemonics are 31 bytes or fewer. + unsafe { self.write_lt_32(name) } + } + + #[inline(always)] + fn write_reg(&mut self, reg: RegSpec) -> Result<(), core::fmt::Error> { + let label = regspec_label(®); + + let _ = Self::REG_LABEL_LT_8; + // Safety: all register labels are 7 bytes or fewer. + unsafe { self.write_lt_8(label) } + } + + #[inline(always)] + fn write_mem_size_label(&mut self, mem_size: u8) -> Result<(), core::fmt::Error> { + let label = mem_size_label(mem_size); + let _ = Self::MEM_SIZE_LABEL_LT_8; + // Safety: all memory size labels are 7 bytes or fewer + unsafe { self.write_lt_8(label) } + } + + #[inline(always)] + fn write_sae_mode(&mut self, sae_mode: super::SaeMode) -> Result<(), core::fmt::Error> { + let label = sae_mode.label(); + + let _ = Self::SAE_LABEL_LT_16; + // Safety: all sae labels are 15 bytes or fewer. + unsafe { self.write_lt_16(label) } + } +} + impl fmt::Display for InstDecoder { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { if self == &InstDecoder::default() { @@ -90,6 +186,19 @@ impl fmt::Display for PrefixVex { } } +impl Segment { + fn name(&self) -> &'static [u8; 2] { + match self { + Segment::CS => b"cs", + Segment::DS => b"ds", + Segment::ES => b"es", + Segment::FS => b"fs", + Segment::GS => b"gs", + Segment::SS => b"ss", + } + } +} + impl fmt::Display for Segment { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { @@ -130,6 +239,10 @@ pub(crate) fn regspec_label(spec: &RegSpec) -> &'static str { unsafe { REG_NAMES.get_kinda_unchecked((spec.num as u16 + ((spec.bank as u16) << 3)) as usize) } } +pub(crate) fn mem_size_label(size: u8) -> &'static str { + unsafe { MEM_SIZE_STRINGS.get_kinda_unchecked(size as usize) } +} + impl fmt::Display for RegSpec { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { f.write_str(regspec_label(self)) @@ -144,201 +257,363 @@ impl fmt::Display for Operand { impl Colorize for Operand { fn colorize(&self, colors: &Y, f: &mut T) -> fmt::Result { - match self { - &Operand::ImmediateU8(imm) => { - write!(f, "{}", colors.number(u8_hex(imm))) - } - &Operand::ImmediateI8(imm) => { - write!(f, "{}", - colors.number(signed_i8_hex(imm))) - }, - &Operand::ImmediateU16(imm) => { - write!(f, "{}", colors.number(u16_hex(imm))) - } - &Operand::ImmediateI16(imm) => { - write!(f, "{}", - colors.number(signed_i16_hex(imm))) - }, - &Operand::ImmediateU32(imm) => { - write!(f, "{}", colors.number(u32_hex(imm))) - } - &Operand::ImmediateI32(imm) => { - write!(f, "{}", - colors.number(signed_i32_hex(imm))) - }, - &Operand::AbsoluteFarAddress { segment, address } => { - write!(f, "{}:{}", - colors.number(u16_hex(segment as u16)), - colors.number(u32_hex(address as u32)), - ) - }, - &Operand::Register(ref spec) => { - f.write_str(regspec_label(spec)) - } - &Operand::RegisterMaskMerge(ref spec, ref mask, merge_mode) => { - f.write_str(regspec_label(spec))?; - if mask.num != 0 { - f.write_str("{")?; - f.write_str(regspec_label(mask))?; - f.write_str("}")?; - } - if let MergeMode::Zero = merge_mode { - f.write_str("{z}")?; - } - Ok(()) - } - &Operand::RegisterMaskMergeSae(ref spec, ref mask, merge_mode, sae_mode) => { - f.write_str(regspec_label(spec))?; - if mask.num != 0 { - f.write_str("{")?; - f.write_str(regspec_label(mask))?; - f.write_str("}")?; - } - if let MergeMode::Zero = merge_mode { - f.write_str("{z}")?; - } - f.write_str(sae_mode.label())?; - Ok(()) - } - &Operand::RegisterMaskMergeSaeNoround(ref spec, ref mask, merge_mode) => { - f.write_str(regspec_label(spec))?; - if mask.num != 0 { - f.write_str("{")?; - f.write_str(regspec_label(mask))?; - f.write_str("}")?; - } - if let MergeMode::Zero = merge_mode { - f.write_str("{z}")?; - } - f.write_str("{sae}")?; - Ok(()) - } - &Operand::DisplacementU16(imm) => { - write!(f, "[{}]", colors.address(u16_hex(imm))) - } - &Operand::DisplacementU32(imm) => { - write!(f, "[{}]", colors.address(u32_hex(imm))) - } - &Operand::RegDisp(ref spec, disp) => { - write!(f, "[{} ", regspec_label(spec))?; - format_number_i32(colors, f, disp, NumberStyleHint::HexSignedWithSignSplit)?; - write!(f, "]") - }, - &Operand::RegDeref(ref spec) => { - f.write_str("[")?; - f.write_str(regspec_label(spec))?; - f.write_str("]") - }, - &Operand::RegScale(ref spec, scale) => { - write!(f, "[{} * {}]", - regspec_label(spec), - colors.number(scale) - ) - }, - &Operand::RegScaleDisp(ref spec, scale, disp) => { - write!(f, "[{} * {} ", - regspec_label(spec), - colors.number(scale), - )?; - format_number_i32(colors, f, disp, NumberStyleHint::HexSignedWithSignSplit)?; - write!(f, "]") - }, - &Operand::RegIndexBase(ref base, ref index) => { - f.write_str("[")?; - f.write_str(regspec_label(base))?; - f.write_str(" + ")?; - f.write_str(regspec_label(index))?; - f.write_str("]") - } - &Operand::RegIndexBaseDisp(ref base, ref index, disp) => { - write!(f, "[{} + {} ", - regspec_label(base), - regspec_label(index), - )?; - format_number_i32(colors, f, disp, NumberStyleHint::HexSignedWithSignSplit)?; - write!(f, "]") - }, - &Operand::RegIndexBaseScale(ref base, ref index, scale) => { - write!(f, "[{} + {} * {}]", - regspec_label(base), - regspec_label(index), - colors.number(scale) - ) + let mut f = yaxpeax_arch::display::FmtSink::new(f); + let mut visitor = DisplayingOperandVisitor { + f: &mut f + }; + self.visit(&mut visitor) + } +} + +struct DisplayingOperandVisitor<'a, T> { + f: &'a mut T, +} + +impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> { + type Ok = (); + type Error = core::fmt::Error; + + #[cfg_attr(feature="profiling", inline(never))] + fn visit_u8(&mut self, imm: u8) -> Result { + self.f.span_start_immediate(); + self.f.write_fixed_size("0x")?; + self.f.write_u8(imm)?; + self.f.span_end_immediate(); + Ok(()) + } + #[cfg_attr(feature="profiling", inline(never))] + fn visit_i8(&mut self, imm: i8) -> Result { + self.f.span_start_immediate(); + let mut v = imm as u8; + if imm < 0 { + self.f.write_char('-')?; + v = -imm as u8; + } + self.f.write_fixed_size("0x")?; + self.f.write_u8(v)?; + self.f.span_end_immediate(); + Ok(()) + } + #[cfg_attr(feature="profiling", inline(never))] + fn visit_u16(&mut self, imm: u16) -> Result { + self.f.span_start_immediate(); + self.f.write_fixed_size("0x")?; + self.f.write_u16(imm)?; + self.f.span_end_immediate(); + Ok(()) + } + #[cfg_attr(feature="profiling", inline(never))] + fn visit_i16(&mut self, imm: i16) -> Result { + self.f.span_start_immediate(); + let mut v = imm as u16; + if imm < 0 { + self.f.write_char('-')?; + v = -imm as u16; + } + self.f.write_fixed_size("0x")?; + self.f.write_u16(v)?; + self.f.span_end_immediate(); + Ok(()) + } + #[cfg_attr(feature="profiling", inline(never))] + fn visit_u32(&mut self, imm: u32) -> Result { + self.f.span_start_immediate(); + self.f.write_fixed_size("0x")?; + self.f.write_u32(imm)?; + self.f.span_end_immediate(); + Ok(()) + } + fn visit_i32(&mut self, imm: i32) -> Result { + self.f.span_start_immediate(); + let mut v = imm as u32; + if imm < 0 { + self.f.write_char('-')?; + v = -imm as u32; + } + self.f.write_fixed_size("0x")?; + self.f.write_u32(v)?; + self.f.span_end_immediate(); + Ok(()) + } + #[cfg_attr(feature="profiling", inline(never))] + fn visit_reg(&mut self, reg: RegSpec) -> Result { + self.f.span_start_register(); + self.f.write_reg(reg)?; + self.f.span_end_register(); + Ok(()) + } + fn visit_reg_mask_merge(&mut self, spec: RegSpec, mask: RegSpec, merge_mode: MergeMode) -> Result { + self.f.span_start_register(); + self.f.write_reg(spec)?; + self.f.span_end_register(); + if mask.num != 0 { + self.f.write_fixed_size("{")?; + self.f.span_start_register(); + self.f.write_reg(mask)?; + self.f.span_end_register(); + self.f.write_fixed_size("}")?; + } + if let MergeMode::Zero = merge_mode { + self.f.write_fixed_size("{z}")?; + } + Ok(()) + } + fn visit_reg_mask_merge_sae(&mut self, spec: RegSpec, mask: RegSpec, merge_mode: MergeMode, sae_mode: super::SaeMode) -> Result { + self.f.write_reg(spec)?; + if mask.num != 0 { + self.f.write_fixed_size("{")?; + self.f.write_reg(mask)?; + self.f.write_fixed_size("}")?; + } + if let MergeMode::Zero = merge_mode { + self.f.write_fixed_size("{z}")?; + } + self.f.write_sae_mode(sae_mode)?; + Ok(()) + } + fn visit_reg_mask_merge_sae_noround(&mut self, spec: RegSpec, mask: RegSpec, merge_mode: MergeMode) -> Result { + self.f.write_reg(spec)?; + if mask.num != 0 { + self.f.write_fixed_size("{")?; + self.f.write_reg(mask)?; + self.f.write_fixed_size("}")?; + } + if let MergeMode::Zero = merge_mode { + self.f.write_fixed_size("{z}")?; + } + self.f.write_fixed_size("{sae}")?; + Ok(()) + } + fn visit_abs_u16(&mut self, imm: u16) -> Result { + self.f.write_fixed_size("[")?; + self.f.write_fixed_size("0x")?; + self.f.write_u16(imm)?; + self.f.write_fixed_size("]")?; + Ok(()) + } + fn visit_abs_u32(&mut self, imm: u32) -> Result { + self.f.write_fixed_size("[")?; + self.f.write_fixed_size("0x")?; + self.f.write_u32(imm)?; + self.f.write_fixed_size("]")?; + Ok(()) + } + #[cfg_attr(not(feature="profiling"), inline(always))] + #[cfg_attr(feature="profiling", inline(never))] + fn visit_disp(&mut self, reg: RegSpec, disp: i32) -> Result { + self.f.write_char('[')?; + self.f.write_reg(reg)?; + self.f.write_fixed_size(" ")?; + + { + let mut v = disp as u32; + if disp < 0 { + self.f.write_fixed_size("- 0x")?; + v = -disp as u32; + } else { + self.f.write_fixed_size("+ 0x")?; } - &Operand::RegIndexBaseScaleDisp(ref base, ref index, scale, disp) => { - write!(f, "[{} + {} * {} ", - regspec_label(base), - regspec_label(index), - colors.number(scale), - )?; - format_number_i32(colors, f, disp, NumberStyleHint::HexSignedWithSignSplit)?; - write!(f, "]") - }, - &Operand::RegDispMasked(ref spec, disp, ref mask_reg) => { - write!(f, "[{} ", regspec_label(spec))?; - format_number_i32(colors, f, disp, NumberStyleHint::HexSignedWithSignSplit)?; - write!(f, "]")?; - write!(f, "{{{}}}", regspec_label(mask_reg)) - }, - &Operand::RegDerefMasked(ref spec, ref mask_reg) => { - f.write_str("[")?; - f.write_str(regspec_label(spec))?; - f.write_str("]")?; - write!(f, "{{{}}}", regspec_label(mask_reg)) - }, - &Operand::RegScaleMasked(ref spec, scale, ref mask_reg) => { - write!(f, "[{} * {}]", - regspec_label(spec), - colors.number(scale) - )?; - write!(f, "{{{}}}", regspec_label(mask_reg)) - }, - &Operand::RegScaleDispMasked(ref spec, scale, disp, ref mask_reg) => { - write!(f, "[{} * {} ", - regspec_label(spec), - colors.number(scale), - )?; - format_number_i32(colors, f, disp, NumberStyleHint::HexSignedWithSignSplit)?; - write!(f, "]")?; - write!(f, "{{{}}}", regspec_label(mask_reg)) - }, - &Operand::RegIndexBaseMasked(ref base, ref index, ref mask_reg) => { - f.write_str("[")?; - f.write_str(regspec_label(base))?; - f.write_str(" + ")?; - f.write_str(regspec_label(index))?; - f.write_str("]")?; - write!(f, "{{{}}}", regspec_label(mask_reg)) + self.f.write_u32(v)?; + } + self.f.write_fixed_size("]") + } + fn visit_deref(&mut self, reg: RegSpec) -> Result { + self.f.write_fixed_size("[")?; + self.f.write_reg(reg)?; + self.f.write_fixed_size("]") + } + fn visit_reg_scale(&mut self, reg: RegSpec, scale: u8) -> Result { + self.f.write_fixed_size("[")?; + self.f.write_reg(reg)?; + self.f.write_fixed_size(" * ")?; + self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc + self.f.write_fixed_size("]")?; + + Ok(()) + } + fn visit_reg_scale_disp(&mut self, reg: RegSpec, scale: u8, disp: i32) -> Result { + self.f.write_fixed_size("[")?; + self.f.write_reg(reg)?; + self.f.write_fixed_size(" * ")?; + self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc + self.f.write_fixed_size(" ")?; + + { + let mut v = disp as u32; + if disp < 0 { + self.f.write_fixed_size("- 0x")?; + v = -disp as u32; + } else { + self.f.write_fixed_size("+ 0x")?; } - &Operand::RegIndexBaseDispMasked(ref base, ref index, disp, ref mask_reg) => { - write!(f, "[{} + {} ", - regspec_label(base), - regspec_label(index), - )?; - format_number_i32(colors, f, disp, NumberStyleHint::HexSignedWithSignSplit)?; - write!(f, "]")?; - write!(f, "{{{}}}", regspec_label(mask_reg)) - }, - &Operand::RegIndexBaseScaleMasked(ref base, ref index, scale, ref mask_reg) => { - write!(f, "[{} + {} * {}]", - regspec_label(base), - regspec_label(index), - colors.number(scale) - )?; - write!(f, "{{{}}}", regspec_label(mask_reg)) + self.f.write_u32(v)?; + } + self.f.write_char(']') + } + fn visit_index_base_scale(&mut self, base: RegSpec, index: RegSpec, scale: u8) -> Result { + self.f.write_fixed_size("[")?; + self.f.write_reg(base)?; + self.f.write_fixed_size(" + ")?; + self.f.write_reg(index)?; + self.f.write_fixed_size(" * ")?; + self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc + self.f.write_fixed_size("]") + } + fn visit_index_base_scale_disp(&mut self, base: RegSpec, index: RegSpec, scale: u8, disp: i32) -> Result { + self.f.write_fixed_size("[")?; + self.f.write_reg(base)?; + self.f.write_fixed_size(" + ")?; + self.f.write_reg(index)?; + self.f.write_fixed_size(" * ")?; + self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc + self.f.write_fixed_size(" ")?; + + { + let mut v = disp as u32; + if disp < 0 { + self.f.write_fixed_size("- 0x")?; + v = -disp as u32; + } else { + self.f.write_fixed_size("+ 0x")?; } - &Operand::RegIndexBaseScaleDispMasked(ref base, ref index, scale, disp, ref mask_reg) => { - write!(f, "[{} + {} * {} ", - regspec_label(base), - regspec_label(index), - colors.number(scale), - )?; - format_number_i32(colors, f, disp, NumberStyleHint::HexSignedWithSignSplit)?; - write!(f, "]")?; - write!(f, "{{{}}}", regspec_label(mask_reg)) - }, - &Operand::Nothing => { Ok(()) }, + self.f.write_u32(v)?; } + self.f.write_fixed_size("]") + } + fn visit_reg_disp_masked(&mut self, spec: RegSpec, disp: i32, mask_reg: RegSpec) -> Result { + self.f.write_char('[')?; + self.f.write_reg(spec)?; + self.f.write_char(' ')?; + let mut v = disp as u32; + if disp < 0 { + self.f.write_fixed_size("- 0x")?; + v = -disp as u32; + } else { + self.f.write_fixed_size("+ 0x")?; + } + self.f.write_u32(v)?; + self.f.write_char(']')?; + self.f.write_char('{')?; + self.f.write_reg(mask_reg)?; + self.f.write_char('}')?; + Ok(()) + } + fn visit_reg_deref_masked(&mut self, spec: RegSpec, mask_reg: RegSpec) -> Result { + self.f.write_fixed_size("[")?; + self.f.write_reg(spec)?; + self.f.write_fixed_size("]")?; + self.f.write_char('{')?; + self.f.write_reg(mask_reg)?; + self.f.write_char('}')?; + Ok(()) + } + fn visit_reg_scale_masked(&mut self, spec: RegSpec, scale: u8, mask_reg: RegSpec) -> Result { + self.f.write_fixed_size("[")?; + self.f.write_reg(spec)?; + self.f.write_fixed_size(" * ")?; + self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc + self.f.write_fixed_size("]")?; + self.f.write_char('{')?; + self.f.write_reg(mask_reg)?; + self.f.write_char('}')?; + Ok(()) + } + fn visit_reg_scale_disp_masked(&mut self, spec: RegSpec, scale: u8, disp: i32, mask_reg: RegSpec) -> Result { + self.f.write_fixed_size("[")?; + self.f.write_reg(spec)?; + self.f.write_fixed_size(" * ")?; + self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc + self.f.write_fixed_size(" ")?; + let mut v = disp as u32; + if disp < 0 { + self.f.write_fixed_size("- 0x")?; + v = -disp as u32; + } else { + self.f.write_fixed_size("+ 0x")?; + } + self.f.write_u32(v)?; + self.f.write_char(']')?; + self.f.write_char('{')?; + self.f.write_reg(mask_reg)?; + self.f.write_char('}')?; + Ok(()) + } + fn visit_index_base_masked(&mut self, base: RegSpec, index: RegSpec, mask_reg: RegSpec) -> Result { + self.f.write_fixed_size("[")?; + self.f.write_reg(base)?; + self.f.write_fixed_size(" + ")?; + self.f.write_reg(index)?; + self.f.write_fixed_size("]")?; + self.f.write_char('{')?; + self.f.write_reg(mask_reg)?; + self.f.write_char('}')?; + Ok(()) + } + fn visit_index_base_disp_masked(&mut self, base: RegSpec, index: RegSpec, disp: i32, mask_reg: RegSpec) -> Result { + self.f.write_fixed_size("[")?; + self.f.write_reg(base)?; + self.f.write_fixed_size(" + ")?; + self.f.write_reg(index)?; + self.f.write_fixed_size(" ")?; + let mut v = disp as u32; + if disp < 0 { + self.f.write_fixed_size("- 0x")?; + v = -disp as u32; + } else { + self.f.write_fixed_size("+ 0x")?; + } + self.f.write_u32(v)?; + self.f.write_char(']')?; + self.f.write_char('{')?; + self.f.write_reg(mask_reg)?; + self.f.write_char('}')?; + Ok(()) + } + fn visit_index_base_scale_masked(&mut self, base: RegSpec, index: RegSpec, scale: u8, mask_reg: RegSpec) -> Result { + self.f.write_fixed_size("[")?; + self.f.write_reg(base)?; + self.f.write_fixed_size(" + ")?; + self.f.write_reg(index)?; + self.f.write_fixed_size(" * ")?; + self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc + self.f.write_fixed_size("]")?; + self.f.write_char('{')?; + self.f.write_reg(mask_reg)?; + self.f.write_char('}')?; + Ok(()) + } + fn visit_index_base_scale_disp_masked(&mut self, base: RegSpec, index: RegSpec, scale: u8, disp: i32, mask_reg: RegSpec) -> Result { + self.f.write_fixed_size("[")?; + self.f.write_reg(base)?; + self.f.write_fixed_size(" + ")?; + self.f.write_reg(index)?; + self.f.write_fixed_size(" * ")?; + self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc + self.f.write_char(' ')?; + let mut v = disp as u32; + if disp < 0 { + self.f.write_fixed_size("- 0x")?; + v = -disp as u32; + } else { + self.f.write_fixed_size("+ 0x")?; + } + self.f.write_u32(v)?; + self.f.write_char(']')?; + self.f.write_char('{')?; + self.f.write_reg(mask_reg)?; + self.f.write_char('}')?; + Ok(()) + } + fn visit_absolute_far_address(&mut self, segment: u16, address: u32) -> Result { + self.f.write_prefixed_u16(segment)?; + self.f.write_fixed_size(":")?; + self.f.write_prefixed_u32(address)?; + Ok(()) + } + + + fn visit_other(&mut self) -> Result { + Ok(()) } } @@ -3365,155 +3640,140 @@ impl Instruction { } } -fn contextualize_intel(instr: &Instruction, colors: &Y, _address: u32, _context: Option<&NoContext>, out: &mut T) -> fmt::Result { +pub(crate) fn contextualize_intel(instr: &Instruction, out: &mut T) -> fmt::Result { if instr.xacquire() { - out.write_str("xacquire ")?; + out.write_fixed_size("xacquire ")?; } if instr.xrelease() { - out.write_str("xrelease ")?; + out.write_fixed_size("xrelease ")?; } if instr.prefixes.lock() { - out.write_str("lock ")?; + out.write_fixed_size("lock ")?; } if instr.prefixes.rep_any() { if instr.opcode.can_rep() { if instr.prefixes.rep() { - write!(out, "rep ")?; + out.write_fixed_size("rep ")?; } else if instr.prefixes.repnz() { - write!(out, "repnz ")?; + out.write_fixed_size("repnz ")?; } } } - out.write_str(instr.opcode.name())?; - - if instr.opcode == Opcode::XBEGIN { - if (instr.imm as i32) >= 0 { - return write!(out, " $+{}", colors.number(signed_i32_hex(instr.imm as i32))); - } else { - return write!(out, " ${}", colors.number(signed_i32_hex(instr.imm as i32))); - } - } + out.write_opcode(instr.opcode)?; if instr.operand_count > 0 { - out.write_str(" ")?; - - let x = Operand::from_spec(instr, instr.operands[0]); - - const RELATIVE_BRANCHES: [Opcode; 21] = [ - Opcode::JMP, Opcode::JCXZ, - Opcode::LOOP, Opcode::LOOPZ, Opcode::LOOPNZ, - Opcode::JO, Opcode::JNO, - Opcode::JB, Opcode::JNB, - Opcode::JZ, Opcode::JNZ, - Opcode::JNA, Opcode::JA, - Opcode::JS, Opcode::JNS, - Opcode::JP, Opcode::JNP, - Opcode::JL, Opcode::JGE, - Opcode::JLE, Opcode::JG, - ]; - - if instr.operands[0] == OperandSpec::ImmI8 || instr.operands[0] == OperandSpec::ImmI32 { - if RELATIVE_BRANCHES.contains(&instr.opcode) { - return match x { - Operand::ImmediateI8(rel) => { - if rel >= 0 { - write!(out, "$+{}", colors.number(signed_i32_hex(rel as i32))) - } else { - write!(out, "${}", colors.number(signed_i32_hex(rel as i32))) - } - } - Operand::ImmediateI32(rel) => { - if rel >= 0 { - write!(out, "$+{}", colors.number(signed_i32_hex(rel))) - } else { - write!(out, "${}", colors.number(signed_i32_hex(rel))) - } - } - _ => { unreachable!() } - }; - } - } + out.write_fixed_size(" ")?; - if x.is_memory() { - out.write_str(MEM_SIZE_STRINGS[instr.mem_size as usize])?; - out.write_str(" ")?; + if instr.visit_operand(0, &mut RelativeBranchPrinter { + inst: instr, + out, + })? { + return Ok(()); } - if let Some(prefix) = instr.segment_override_for_op(0) { - write!(out, "{}:", prefix)?; + if instr.operands[0 as usize].is_memory() { + out.write_mem_size_label(instr.mem_size)?; + if let Some(prefix) = instr.segment_override_for_op(0) { + let name = prefix.name(); + out.write_char(' ')?; + out.write_char(name[0] as char)?; + out.write_char(name[1] as char)?; + out.write_fixed_size(":")?; + } else { + out.write_fixed_size(" ")?; + } } - x.colorize(colors, out)?; + + let mut displayer = DisplayingOperandVisitor { + f: out, + }; + instr.visit_operand(0 as u8, &mut displayer)?; for i in 1..instr.operand_count { - match instr.opcode { - _ => { - match &instr.operands[i as usize] { - &OperandSpec::Nothing => { - return Ok(()); - }, - _ => { - out.write_str(", ")?; - let x = Operand::from_spec(instr, instr.operands[i as usize]); - if x.is_memory() { - out.write_str(MEM_SIZE_STRINGS[instr.mem_size as usize])?; - out.write_str(" ")?; + // don't worry about checking for `instr.operands[i] != Nothing`, it would be a bug to + // reach that while iterating only to `operand_count`.. + out.write_fixed_size(", ")?; + if i >= 4 { + unsafe { core::hint::unreachable_unchecked(); } + } + + if instr.operands[i as usize].is_memory() { + out.write_mem_size_label(instr.mem_size)?; + if i >= 4 { + unsafe { core::hint::unreachable_unchecked(); } + } + if let Some(prefix) = instr.segment_override_for_op(i) { + let name = prefix.name(); + out.write_char(' ')?; + out.write_char(name[0] as char)?; + out.write_char(name[1] as char)?; + out.write_fixed_size(":")?; + } else { + out.write_fixed_size(" ")?; + } + } + + let mut displayer = DisplayingOperandVisitor { + f: out, + }; + + instr.visit_operand(i as u8, &mut displayer)?; + if let Some(evex) = instr.prefixes.evex() { + if evex.broadcast() && instr.operands[i as usize].is_memory() { + let scale = if instr.opcode == Opcode::VCVTPD2PS || instr.opcode == Opcode::VCVTTPD2UDQ || instr.opcode == Opcode::VCVTPD2UDQ || instr.opcode == Opcode::VCVTUDQ2PD || instr.opcode == Opcode::VCVTPS2PD || instr.opcode == Opcode::VCVTQQ2PS || instr.opcode == Opcode::VCVTDQ2PD || instr.opcode == Opcode::VCVTTPD2DQ || instr.opcode == Opcode::VFPCLASSPS || instr.opcode == Opcode::VFPCLASSPD || instr.opcode == Opcode::VCVTNEPS2BF16 || instr.opcode == Opcode::VCVTUQQ2PS || instr.opcode == Opcode::VCVTPD2DQ || instr.opcode == Opcode::VCVTTPS2UQQ || instr.opcode == Opcode::VCVTPS2UQQ || instr.opcode == Opcode::VCVTTPS2QQ || instr.opcode == Opcode::VCVTPS2QQ { + if instr.opcode == Opcode::VFPCLASSPS || instr.opcode == Opcode::VCVTNEPS2BF16 { + if evex.vex().l() { + 8 + } else if evex.lp() { + 16 + } else { + 4 } - if let Some(prefix) = instr.segment_override_for_op(i) { - write!(out, "{}:", prefix)?; + } else if instr.opcode == Opcode::VFPCLASSPD { + if evex.vex().l() { + 4 + } else if evex.lp() { + 8 + } else { + 2 } - x.colorize(colors, out)?; - if let Some(evex) = instr.prefixes.evex() { - if evex.broadcast() && x.is_memory() { - let scale = if instr.opcode == Opcode::VCVTPD2PS || instr.opcode == Opcode::VCVTTPD2UDQ || instr.opcode == Opcode::VCVTPD2UDQ || instr.opcode == Opcode::VCVTUDQ2PD || instr.opcode == Opcode::VCVTPS2PD || instr.opcode == Opcode::VCVTQQ2PS || instr.opcode == Opcode::VCVTDQ2PD || instr.opcode == Opcode::VCVTTPD2DQ || instr.opcode == Opcode::VFPCLASSPS || instr.opcode == Opcode::VFPCLASSPD || instr.opcode == Opcode::VCVTNEPS2BF16 || instr.opcode == Opcode::VCVTUQQ2PS || instr.opcode == Opcode::VCVTPD2DQ || instr.opcode == Opcode::VCVTTPS2UQQ || instr.opcode == Opcode::VCVTPS2UQQ || instr.opcode == Opcode::VCVTTPS2QQ || instr.opcode == Opcode::VCVTPS2QQ { - if instr.opcode == Opcode::VFPCLASSPS || instr.opcode == Opcode::VCVTNEPS2BF16 { - if evex.vex().l() { - 8 - } else if evex.lp() { - 16 - } else { - 4 - } - } else if instr.opcode == Opcode::VFPCLASSPD { - if evex.vex().l() { - 4 - } else if evex.lp() { - 8 - } else { - 2 - } - } else { - // vcvtpd2ps is "cool": in broadcast mode, it can read a - // double-precision float (qword), resize to single-precision, - // then broadcast that to the whole destination register. this - // means we need to show `xmm, qword [addr]{1to4}` if vector - // size is 256. likewise, scale of 8 for the same truncation - // reason if vector size is 512. - // vcvtudq2pd is the same story. - // vfpclassp{s,d} is a mystery to me. - if evex.vex().l() { - 4 - } else if evex.lp() { - 8 - } else { - 2 - } - } - } else { - // this should never be `None` - that would imply two - // memory operands for a broadcasted operation. - if let Some(width) = Operand::from_spec(instr, instr.operands[i as usize - 1]).width() { - width / instr.mem_size - } else { - 0 - } - }; - write!(out, "{{1to{}}}", scale)?; - } + } else { + // vcvtpd2ps is "cool": in broadcast mode, it can read a + // double-precision float (qword), resize to single-precision, + // then broadcast that to the whole destination register. this + // means we need to show `xmm, qword [addr]{1to4}` if vector + // size is 256. likewise, scale of 8 for the same truncation + // reason if vector size is 512. + // vcvtudq2pd is the same story. + // vfpclassp{s,d} is a mystery to me. + if evex.vex().l() { + 4 + } else if evex.lp() { + 8 + } else { + 2 } } + } else { + // this should never be `None` - that would imply two + // memory operands for a broadcasted operation. + if let Some(width) = Operand::from_spec(instr, instr.operands[i as usize - 1]).width() { + width / instr.mem_size + } else { + 0 + } + }; + out.write_fixed_size("{1to")?; + static STRING_LUT: &'static [&'static str] = &[ + "0", "1", "2", "3", "4", "5", "6", "7", "8", + "9", "10", "11", "12", "13", "14", "15", "16", + ]; + unsafe { + out.write_lt_16(STRING_LUT.get_kinda_unchecked(scale as usize))?; } + out.write_char('}')?; } } } @@ -3521,7 +3781,7 @@ fn contextualize_intel(instr: &Instruction, colors: Ok(()) } -fn contextualize_c(instr: &Instruction, colors: &Y, _address: u32, _context: Option<&NoContext>, out: &mut T) -> fmt::Result { +pub(crate) fn contextualize_c(instr: &Instruction, out: &mut T) -> fmt::Result { let mut brace_count = 0; let mut prefixed = false; @@ -3545,7 +3805,7 @@ fn contextualize_c(instr: &Instruction, colors: &Y, } if instr.prefixes.rep_any() { - if [Opcode::MOVS, Opcode::CMPS, Opcode::LODS, Opcode::STOS, Opcode::INS, Opcode::OUTS].contains(&instr.opcode) { + if instr.opcode.can_rep() { let word_str = match instr.mem_size { 1 => "byte", 2 => "word", @@ -3567,21 +3827,29 @@ fn contextualize_c(instr: &Instruction, colors: &Y, } } - fn write_jmp_operand(op: Operand, colors: &Y, out: &mut T) -> fmt::Result { + fn write_jmp_operand(op: Operand, out: &mut T) -> fmt::Result { + let mut out = yaxpeax_arch::display::FmtSink::new(out); + use core::fmt::Write; match op { Operand::ImmediateI8(rel) => { - if rel >= 0 { - write!(out, "$+{}", colors.number(signed_i32_hex(rel as i32))) + let rel = if rel >= 0 { + out.write_str("$+")?; + rel as u8 } else { - write!(out, "${}", colors.number(signed_i32_hex(rel as i32))) - } + out.write_str("$-")?; + rel.unsigned_abs() + }; + out.write_prefixed_u8(rel) } Operand::ImmediateI32(rel) => { - if rel >= 0 { - write!(out, "$+{}", colors.number(signed_i32_hex(rel))) + let rel = if rel >= 0 { + out.write_str("$+")?; + rel as u32 } else { - write!(out, "${}", colors.number(signed_i32_hex(rel))) - } + out.write_str("$-")?; + rel.unsigned_abs() + }; + out.write_prefixed_u32(rel) } other => { write!(out, "{}", other) @@ -3744,87 +4012,87 @@ fn contextualize_c(instr: &Instruction, colors: &Y, } Opcode::JMP => { out.write_str("jmp ")?; - write_jmp_operand(instr.operand(0), colors, out)?; + write_jmp_operand(instr.operand(0), out)?; }, Opcode::JCXZ => { out.write_str("if cx == 0 then jmp ")?; - write_jmp_operand(instr.operand(0), colors, out)?; + write_jmp_operand(instr.operand(0), out)?; }, Opcode::LOOP => { out.write_str("cx--; if cx != 0 then jmp ")?; - write_jmp_operand(instr.operand(0), colors, out)?; + write_jmp_operand(instr.operand(0), out)?; }, Opcode::LOOPZ => { out.write_str("cx--; if cx != 0 and zero(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), colors, out)?; + write_jmp_operand(instr.operand(0), out)?; }, Opcode::LOOPNZ => { out.write_str("cx--; if cx != 0 and !zero(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), colors, out)?; + write_jmp_operand(instr.operand(0), out)?; }, Opcode::JO => { out.write_str("if _(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), colors, out)?; + write_jmp_operand(instr.operand(0), out)?; }, Opcode::JNO => { out.write_str("if _(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), colors, out)?; + write_jmp_operand(instr.operand(0), out)?; }, Opcode::JB => { out.write_str("if /* unsigned */ below(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), colors, out)?; + write_jmp_operand(instr.operand(0), out)?; }, Opcode::JNB => { out.write_str("if /* unsigned */ above_or_equal(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), colors, out)?; + write_jmp_operand(instr.operand(0), out)?; }, Opcode::JZ => { out.write_str("if zero(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), colors, out)?; + write_jmp_operand(instr.operand(0), out)?; }, Opcode::JNZ => { out.write_str("if !zero(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), colors, out)?; + write_jmp_operand(instr.operand(0), out)?; }, Opcode::JNA => { out.write_str("if /* unsigned */ below_or_equal(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), colors, out)?; + write_jmp_operand(instr.operand(0), out)?; }, Opcode::JA => { out.write_str("if /* unsigned */ above(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), colors, out)?; + write_jmp_operand(instr.operand(0), out)?; }, Opcode::JS => { out.write_str("if signed(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), colors, out)?; + write_jmp_operand(instr.operand(0), out)?; }, Opcode::JNS => { out.write_str("if !signed(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), colors, out)?; + write_jmp_operand(instr.operand(0), out)?; }, Opcode::JP => { out.write_str("if parity(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), colors, out)?; + write_jmp_operand(instr.operand(0), out)?; }, Opcode::JNP => { out.write_str("if !parity(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), colors, out)?; + write_jmp_operand(instr.operand(0), out)?; }, Opcode::JL => { out.write_str("if /* signed */ less(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), colors, out)?; + write_jmp_operand(instr.operand(0), out)?; }, Opcode::JGE => { out.write_str("if /* signed */ greater_or_equal(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), colors, out)?; + write_jmp_operand(instr.operand(0), out)?; }, Opcode::JLE => { out.write_str("if /* signed */ less_or_equal(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), colors, out)?; + write_jmp_operand(instr.operand(0), out)?; }, Opcode::JG => { out.write_str("if /* signed */ greater(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), colors, out)?; + write_jmp_operand(instr.operand(0), out)?; }, Opcode::NOP => { write!(out, "nop")?; @@ -3856,18 +4124,20 @@ fn contextualize_c(instr: &Instruction, colors: &Y, } impl <'instr, T: fmt::Write, Y: YaxColors> ShowContextual for InstructionDisplayer<'instr> { - fn contextualize(&self, colors: &Y, address: u32, context: Option<&NoContext>, out: &mut T) -> fmt::Result { + fn contextualize(&self, _colors: &Y, _address: u32, _context: Option<&NoContext>, out: &mut T) -> fmt::Result { let InstructionDisplayer { instr, style, } = self; + let mut out = yaxpeax_arch::display::FmtSink::new(out); + match style { DisplayStyle::Intel => { - contextualize_intel(instr, colors, address, context, out) + contextualize_intel(instr, &mut out) } DisplayStyle::C => { - contextualize_c(instr, colors, address, context, out) + contextualize_c(instr, &mut out) } } } @@ -3933,3 +4203,150 @@ impl ShowContextual { + inst: &'a Instruction, + out: &'a mut F, +} + +impl<'a, F: DisplaySink> super::OperandVisitor for RelativeBranchPrinter<'a, F> { + // return true if we printed a relative branch offset, false otherwise + type Ok = bool; + // but errors are errors + type Error = fmt::Error; + + fn visit_reg(&mut self, _reg: RegSpec) -> Result { + Ok(false) + } + fn visit_deref(&mut self, _reg: RegSpec) -> Result { + Ok(false) + } + fn visit_disp(&mut self, _reg: RegSpec, _disp: i32) -> Result { + Ok(false) + } + #[cfg_attr(feature="profiling", inline(never))] + fn visit_i8(&mut self, rel: i8) -> Result { + if RELATIVE_BRANCHES.contains(&self.inst.opcode) { + self.out.write_char('$')?; + // danger_anguished_string_write(self.out, "$"); + let mut v = rel as u8; + if rel < 0 { + self.out.write_char('-')?; + //danger_anguished_string_write(&mut self.out, "-"); + v = -rel as u8; + } else { + self.out.write_char('+')?; + // danger_anguished_string_write(&mut self.out, "+"); + } + self.out.write_fixed_size("0x")?; + self.out.write_u8(v)?; + Ok(true) + } else { + Ok(false) + } + } + #[cfg_attr(feature="profiling", inline(never))] + fn visit_i32(&mut self, rel: i32) -> Result { + if RELATIVE_BRANCHES.contains(&self.inst.opcode) || self.inst.opcode == Opcode::XBEGIN { + self.out.write_char('$')?; + // danger_anguished_string_write(self.out, "$"); + let mut v = rel as u32; + if rel < 0 { + self.out.write_char('-')?; + // danger_anguished_string_write(&mut self.out, "-"); + v = -rel as u32; + } else { + self.out.write_char('+')?; + // danger_anguished_string_write(&mut self.out, "+"); + } + self.out.write_fixed_size("0x")?; + self.out.write_u32(v)?; + Ok(true) + } else { + Ok(false) + } + } + fn visit_u8(&mut self, _imm: u8) -> Result { + Ok(false) + } + fn visit_i16(&mut self, _imm: i16) -> Result { + Ok(false) + } + fn visit_u16(&mut self, _imm: u16) -> Result { + Ok(false) + } + fn visit_u32(&mut self, _imm: u32) -> Result { + Ok(false) + } + fn visit_abs_u16(&mut self, _imm: u16) -> Result { + Ok(false) + } + fn visit_abs_u32(&mut self, _imm: u32) -> Result { + Ok(false) + } + fn visit_reg_scale(&mut self, _reg: RegSpec, _scale: u8) -> Result { + Ok(false) + } + fn visit_index_base_scale(&mut self, _base: RegSpec, _index: RegSpec, _scale: u8) -> Result { + Ok(false) + } + fn visit_reg_scale_disp(&mut self, _reg: RegSpec, _scale: u8, _disp: i32) -> Result { + Ok(false) + } + fn visit_index_base_scale_disp(&mut self, _base: RegSpec, _index: RegSpec, _scale: u8, _disp: i32) -> Result { + Ok(false) + } + fn visit_other(&mut self) -> Result { + Ok(false) + } + fn visit_reg_mask_merge(&mut self, _spec: RegSpec, _mask: RegSpec, _merge_mode: MergeMode) -> Result { + Ok(false) + } + fn visit_reg_mask_merge_sae(&mut self, _spec: RegSpec, _mask: RegSpec, _merge_mode: MergeMode, _sae_mode: super::SaeMode) -> Result { + Ok(false) + } + fn visit_reg_mask_merge_sae_noround(&mut self, _spec: RegSpec, _mask: RegSpec, _merge_mode: MergeMode) -> Result { + Ok(false) + } + fn visit_reg_disp_masked(&mut self, _spec: RegSpec, _disp: i32, _mask_reg: RegSpec) -> Result { + Ok(false) + } + fn visit_reg_deref_masked(&mut self, _spec: RegSpec, _mask_reg: RegSpec) -> Result { + Ok(false) + } + fn visit_reg_scale_masked(&mut self, _spec: RegSpec, _scale: u8, _mask_reg: RegSpec) -> Result { + Ok(false) + } + fn visit_reg_scale_disp_masked(&mut self, _spec: RegSpec, _scale: u8, _disp: i32, _mask_reg: RegSpec) -> Result { + Ok(false) + } + fn visit_index_base_masked(&mut self, _base: RegSpec, _index: RegSpec, _mask_reg: RegSpec) -> Result { + Ok(false) + } + fn visit_index_base_disp_masked(&mut self, _base: RegSpec, _index: RegSpec, _disp: i32, _mask_reg: RegSpec) -> Result { + Ok(false) + } + fn visit_index_base_scale_masked(&mut self, _base: RegSpec, _index: RegSpec, _scale: u8, _mask_reg: RegSpec) -> Result { + Ok(false) + } + fn visit_index_base_scale_disp_masked(&mut self, _base: RegSpec, _index: RegSpec, _scale: u8, _disp: i32, _mask_reg: RegSpec) -> Result { + Ok(false) + } + fn visit_absolute_far_address(&mut self, _segment: u16, _address: u32) -> Result { + Ok(false) + } +} From 2ac46a98585b93f62961fdd82a1f2d1266761305 Mon Sep 17 00:00:00 2001 From: iximeow Date: Sun, 23 Jun 2024 14:36:05 -0700 Subject: [PATCH 74/95] forward long deprecation allowances as appropriate --- src/long_mode/display.rs | 31 +- src/protected_mode/display.rs | 1473 +-------------------------------- src/real_mode/display.rs | 1473 +-------------------------------- 3 files changed, 95 insertions(+), 2882 deletions(-) diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index 4ffc431..f765fb7 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -1,5 +1,7 @@ use core::fmt; +// allowing these deprecated items for the time being, not yet breaking yaxpeax-x86 apis +#[allow(deprecated)] use yaxpeax_arch::{Colorize, ShowContextual, NoColors, YaxColors}; use crate::MEM_SIZE_STRINGS; @@ -254,10 +256,14 @@ impl fmt::Display for RegSpec { impl fmt::Display for Operand { fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + // to reuse one implementation, call the deprecated function for now. + #[allow(deprecated)] self.colorize(&NoColors, fmt) } } +// allowing these deprecated items for the time being, not yet breaking yaxpeax-x86 apis +#[allow(deprecated)] impl Colorize for Operand { fn colorize(&self, _colors: &Y, f: &mut T) -> fmt::Result { let mut f = yaxpeax_arch::display::FmtSink::new(f); @@ -2134,8 +2140,18 @@ impl Opcode { } } +// allowing these deprecated items for the time being, not yet breaking yaxpeax-x86 apis +#[allow(deprecated)] impl Colorize for Opcode { - fn colorize(&self, colors: &Y, out: &mut T) -> fmt::Result { + fn colorize(&self, _colors: &Y, out: &mut T) -> fmt::Result { + out.write_str(self.name()) + // a way to preserve opcode colorization might be to see entering an opcode span, + // collecting text into a buffer, waiting for the span to exit, looking that up in a map + // for opcode types, and then picking a color. this really should be something like "opcode + // information" that can be looked up (including things like operand read/write behavior).. + // + // leaving this commented out as a reminder of what opcode to behavior mapping was like + /* match self { Opcode::VGF2P8AFFINEQB | Opcode::VGF2P8AFFINEINVQB | @@ -3561,17 +3577,22 @@ impl Colorize for Opcode { Opcode::UD2 | Opcode::Invalid => { write!(out, "{}", colors.invalid_op(self)) } } + */ } } impl fmt::Display for Instruction { fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + // to reuse one implementation, call the deprecated function for now. + #[allow(deprecated)] self.display_with(DisplayStyle::Intel).colorize(&NoColors, fmt) } } impl<'instr> fmt::Display for InstructionDisplayer<'instr> { fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + // to reuse one implementation, call the deprecated function for now. + #[allow(deprecated)] self.colorize(&NoColors, fmt) } } @@ -3618,6 +3639,8 @@ pub struct InstructionDisplayer<'instr> { * UPDATE: really wish i thought of DisplaySink back then, really wish this was bounded as T: * DisplaySink. */ +// allowing these deprecated items for the time being, not yet breaking yaxpeax-x86 apis +#[allow(deprecated)] impl <'instr, T: fmt::Write, Y: YaxColors> Colorize for InstructionDisplayer<'instr> { fn colorize(&self, colors: &Y, out: &mut T) -> fmt::Result { // TODO: I DONT LIKE THIS, there is no address i can give contextualize here, @@ -4129,6 +4152,8 @@ pub(crate) fn contextualize_c(instr: &Instruction, out: &mut T) Ok(()) } +// allowing these deprecated items for the time being, not yet breaking yaxpeax-x86 apis +#[allow(deprecated)] impl <'instr, T: fmt::Write, Y: YaxColors> ShowContextual for InstructionDisplayer<'instr> { fn contextualize(&self, _colors: &Y, _address: u64, _context: Option<&NoContext>, out: &mut T) -> fmt::Result { let InstructionDisplayer { @@ -4149,6 +4174,8 @@ impl <'instr, T: fmt::Write, Y: YaxColors> ShowContextual } } +// allowing these deprecated items for the time being, not yet breaking yaxpeax-x86 apis +#[allow(deprecated)] #[cfg(feature="std")] impl ShowContextual], T, Y> for Instruction { fn contextualize(&self, colors: &Y, _address: u64, context: Option<&[Option]>, out: &mut T) -> fmt::Result { @@ -4161,7 +4188,7 @@ impl ShowContextual fmt::Result { + // to reuse one implementation, call the deprecated function for now. + #[allow(deprecated)] self.colorize(&NoColors, fmt) } } +// allowing these deprecated items for the time being, not yet breaking yaxpeax-x86 apis +#[allow(deprecated)] impl Colorize for Operand { - fn colorize(&self, colors: &Y, f: &mut T) -> fmt::Result { + fn colorize(&self, _colors: &Y, f: &mut T) -> fmt::Result { let mut f = yaxpeax_arch::display::FmtSink::new(f); let mut visitor = DisplayingOperandVisitor { f: &mut f @@ -2130,1456 +2136,27 @@ impl Opcode { } } +// allowing these deprecated items for the time being, not yet breaking yaxpeax-x86 apis +#[allow(deprecated)] impl Colorize for Opcode { - fn colorize(&self, colors: &Y, out: &mut T) -> fmt::Result { - match self { - Opcode::VGF2P8AFFINEQB | - Opcode::VGF2P8AFFINEINVQB | - Opcode::VPSHRDQ | - Opcode::VPSHRDD | - Opcode::VPSHRDW | - Opcode::VPSHLDQ | - Opcode::VPSHLDD | - Opcode::VPSHLDW | - Opcode::VBROADCASTF32X8 | - Opcode::VBROADCASTF64X4 | - Opcode::VBROADCASTF32X4 | - Opcode::VBROADCASTF64X2 | - Opcode::VBROADCASTF32X2 | - Opcode::VBROADCASTI32X8 | - Opcode::VBROADCASTI64X4 | - Opcode::VBROADCASTI32X4 | - Opcode::VBROADCASTI64X2 | - Opcode::VBROADCASTI32X2 | - Opcode::VEXTRACTI32X8 | - Opcode::VEXTRACTF32X8 | - Opcode::VINSERTI32X8 | - Opcode::VINSERTF32X8 | - Opcode::VINSERTI32X4 | - Opcode::V4FNMADDSS | - Opcode::V4FNMADDPS | - Opcode::VCVTNEPS2BF16 | - Opcode::V4FMADDSS | - Opcode::V4FMADDPS | - Opcode::VCVTNE2PS2BF16 | - Opcode::VP2INTERSECTD | - Opcode::VP2INTERSECTQ | - Opcode::VP4DPWSSDS | - Opcode::VP4DPWSSD | - Opcode::VPDPWSSDS | - Opcode::VPDPWSSD | - Opcode::VPDPBUSDS | - Opcode::VDPBF16PS | - Opcode::VPBROADCASTMW2D | - Opcode::VPBROADCASTMB2Q | - Opcode::VPMOVD2M | - Opcode::VPMOVQD | - Opcode::VPMOVWB | - Opcode::VPMOVDB | - Opcode::VPMOVDW | - Opcode::VPMOVQB | - Opcode::VPMOVQW | - Opcode::VGF2P8MULB | - Opcode::VPMADD52HUQ | - Opcode::VPMADD52LUQ | - Opcode::VPSHUFBITQMB | - Opcode::VPERMB | - Opcode::VPEXPANDD | - Opcode::VPEXPANDQ | - Opcode::VPABSQ | - Opcode::VPRORVD | - Opcode::VPRORVQ | - Opcode::VPMULTISHIFTQB | - Opcode::VPERMT2B | - Opcode::VPERMT2W | - Opcode::VPSHRDVQ | - Opcode::VPSHRDVD | - Opcode::VPSHRDVW | - Opcode::VPSHLDVQ | - Opcode::VPSHLDVD | - Opcode::VPSHLDVW | - Opcode::VPCOMPRESSB | - Opcode::VPCOMPRESSW | - Opcode::VPEXPANDB | - Opcode::VPEXPANDW | - Opcode::VPOPCNTD | - Opcode::VPOPCNTQ | - Opcode::VPOPCNTB | - Opcode::VPOPCNTW | - Opcode::VSCALEFSS | - Opcode::VSCALEFSD | - Opcode::VSCALEFPS | - Opcode::VSCALEFPD | - Opcode::VPDPBUSD | - Opcode::VCVTUSI2SD | - Opcode::VCVTUSI2SS | - Opcode::VPXORD | - Opcode::VPXORQ | - Opcode::VPORD | - Opcode::VPORQ | - Opcode::VPANDND | - Opcode::VPANDNQ | - Opcode::VPANDD | - Opcode::VPANDQ | - - Opcode::VHADDPS | - Opcode::VHSUBPS | - Opcode::VADDSUBPS | - Opcode::VADDPD | - Opcode::VADDPS | - Opcode::VADDSD | - Opcode::VADDSS | - Opcode::VADDSUBPD | - Opcode::VFMADD132PD | - Opcode::VFMADD132PS | - Opcode::VFMADD132SD | - Opcode::VFMADD132SS | - Opcode::VFMADD213PD | - Opcode::VFMADD213PS | - Opcode::VFMADD213SD | - Opcode::VFMADD213SS | - Opcode::VFMADD231PD | - Opcode::VFMADD231PS | - Opcode::VFMADD231SD | - Opcode::VFMADD231SS | - Opcode::VFMADDSUB132PD | - Opcode::VFMADDSUB132PS | - Opcode::VFMADDSUB213PD | - Opcode::VFMADDSUB213PS | - Opcode::VFMADDSUB231PD | - Opcode::VFMADDSUB231PS | - Opcode::VFMSUB132PD | - Opcode::VFMSUB132PS | - Opcode::VFMSUB132SD | - Opcode::VFMSUB132SS | - Opcode::VFMSUB213PD | - Opcode::VFMSUB213PS | - Opcode::VFMSUB213SD | - Opcode::VFMSUB213SS | - Opcode::VFMSUB231PD | - Opcode::VFMSUB231PS | - Opcode::VFMSUB231SD | - Opcode::VFMSUB231SS | - Opcode::VFMSUBADD132PD | - Opcode::VFMSUBADD132PS | - Opcode::VFMSUBADD213PD | - Opcode::VFMSUBADD213PS | - Opcode::VFMSUBADD231PD | - Opcode::VFMSUBADD231PS | - Opcode::VFNMADD132PD | - Opcode::VFNMADD132PS | - Opcode::VFNMADD132SD | - Opcode::VFNMADD132SS | - Opcode::VFNMADD213PD | - Opcode::VFNMADD213PS | - Opcode::VFNMADD213SD | - Opcode::VFNMADD213SS | - Opcode::VFNMADD231PD | - Opcode::VFNMADD231PS | - Opcode::VFNMADD231SD | - Opcode::VFNMADD231SS | - Opcode::VFNMSUB132PD | - Opcode::VFNMSUB132PS | - Opcode::VFNMSUB132SD | - Opcode::VFNMSUB132SS | - Opcode::VFNMSUB213PD | - Opcode::VFNMSUB213PS | - Opcode::VFNMSUB213SD | - Opcode::VFNMSUB213SS | - Opcode::VFNMSUB231PD | - Opcode::VFNMSUB231PS | - Opcode::VFNMSUB231SD | - Opcode::VFNMSUB231SS | - Opcode::VDIVPD | - Opcode::VDIVPS | - Opcode::VDIVSD | - Opcode::VDIVSS | - Opcode::VHADDPD | - Opcode::VHSUBPD | - Opcode::HADDPD | - Opcode::HSUBPD | - Opcode::VMULPD | - Opcode::VMULPS | - Opcode::VMULSD | - Opcode::VMULSS | - Opcode::VPABSB | - Opcode::VPABSD | - Opcode::VPABSW | - Opcode::PABSB | - Opcode::PABSD | - Opcode::PABSW | - Opcode::VPSIGNB | - Opcode::VPSIGND | - Opcode::VPSIGNW | - Opcode::PSIGNB | - Opcode::PSIGND | - Opcode::PSIGNW | - Opcode::VPADDB | - Opcode::VPADDD | - Opcode::VPADDQ | - Opcode::VPADDSB | - Opcode::VPADDSW | - Opcode::VPADDUSB | - Opcode::VPADDUSW | - Opcode::VPADDW | - Opcode::VPAVGB | - Opcode::VPAVGW | - Opcode::VPMULDQ | - Opcode::VPMULHRSW | - Opcode::VPMULHUW | - Opcode::VPMULHW | - Opcode::VPMULLQ | - Opcode::VPMULLD | - Opcode::VPMULLW | - Opcode::VPMULUDQ | - Opcode::PCLMULQDQ | - Opcode::PMULDQ | - Opcode::PMULHRSW | - Opcode::PMULLD | - Opcode::VPSUBB | - Opcode::VPSUBD | - Opcode::VPSUBQ | - Opcode::VPSUBSB | - Opcode::VPSUBSW | - Opcode::VPSUBUSB | - Opcode::VPSUBUSW | - Opcode::VPSUBW | - Opcode::VROUNDPD | - Opcode::VROUNDPS | - Opcode::VEXP2PD | - Opcode::VEXP2PS | - Opcode::VEXP2SD | - Opcode::VEXP2SS | - Opcode::VRCP28PD | - Opcode::VRCP28PS | - Opcode::VRCP28SD | - Opcode::VRCP28SS | - Opcode::VRCP14PD | - Opcode::VRCP14PS | - Opcode::VRCP14SD | - Opcode::VRCP14SS | - Opcode::VRNDSCALEPD | - Opcode::VRNDSCALEPS | - Opcode::VRNDSCALESD | - Opcode::VRNDSCALESS | - Opcode::VRSQRT14PD | - Opcode::VRSQRT14PS | - Opcode::VRSQRT14SD | - Opcode::VRSQRT14SS | - Opcode::VSCALEDPD | - Opcode::VSCALEDPS | - Opcode::VSCALEDSD | - Opcode::VSCALEDSS | - Opcode::VRSQRT28PD | - Opcode::VRSQRT28PS | - Opcode::VRSQRT28SD | - Opcode::VRSQRT28SS | - Opcode::VRSQRTPS | - Opcode::VSQRTPD | - Opcode::VSQRTPS | - Opcode::VSUBPD | - Opcode::VSUBPS | - Opcode::VSUBSD | - Opcode::VSUBSS | - Opcode::VRCPSS | - Opcode::VROUNDSD | - Opcode::VROUNDSS | - Opcode::ROUNDPD | - Opcode::ROUNDPS | - Opcode::ROUNDSD | - Opcode::ROUNDSS | - Opcode::VRSQRTSS | - Opcode::VSQRTSD | - Opcode::VSQRTSS | - Opcode::VPSADBW | - Opcode::VMPSADBW | - Opcode::VDBPSADBW | - Opcode::VPHADDD | - Opcode::VPHADDSW | - Opcode::VPHADDW | - Opcode::VPHSUBD | - Opcode::VPHSUBSW | - Opcode::VPHSUBW | - Opcode::VPMADDUBSW | - Opcode::VPMADDWD | - Opcode::VDPPD | - Opcode::VDPPS | - Opcode::VRCPPS | - Opcode::VORPD | - Opcode::VORPS | - Opcode::VANDPD | - Opcode::VANDPS | - Opcode::VANDNPD | - Opcode::VANDNPS | - Opcode::VPAND | - Opcode::VPANDN | - Opcode::VPOR | - Opcode::VPXOR | - Opcode::VXORPD | - Opcode::VXORPS | - Opcode::VPSLLD | - Opcode::VPSLLDQ | - Opcode::VPSLLQ | - Opcode::VPSLLVD | - Opcode::VPSLLVQ | - Opcode::VPSLLW | - Opcode::VPROLD | - Opcode::VPROLQ | - Opcode::VPROLVD | - Opcode::VPROLVQ | - Opcode::VPRORD | - Opcode::VPRORQ | - Opcode::VPRORRD | - Opcode::VPRORRQ | - Opcode::VPSLLVW | - Opcode::VPSRAQ | - Opcode::VPSRAVQ | - Opcode::VPSRAVW | - Opcode::VPSRLVW | - Opcode::VPSRAD | - Opcode::VPSRAVD | - Opcode::VPSRAW | - Opcode::VPSRLD | - Opcode::VPSRLDQ | - Opcode::VPSRLQ | - Opcode::VPSRLVD | - Opcode::VPSRLVQ | - Opcode::VPSRLW | - Opcode::PHADDD | - Opcode::PHADDSW | - Opcode::PHADDW | - Opcode::PHSUBD | - Opcode::PHSUBSW | - Opcode::PHSUBW | - Opcode::PMADDUBSW | - Opcode::ADDSUBPD | - Opcode::DPPS | - Opcode::DPPD | - Opcode::MPSADBW | - Opcode::RCPSS | - Opcode::RSQRTSS | - Opcode::SQRTSD | - Opcode::ADDSD | - Opcode::SUBSD | - Opcode::MULSD | - Opcode::DIVSD | - Opcode::SQRTSS | - Opcode::ADDSS | - Opcode::SUBSS | - Opcode::MULSS | - Opcode::DIVSS | - Opcode::HADDPS | - Opcode::HSUBPS | - Opcode::ADDSUBPS | - Opcode::PMULHRW | - Opcode::PFRCP | - Opcode::PFRSQRT | - Opcode::PFSUB | - Opcode::PFADD | - Opcode::PFRCPIT1 | - Opcode::PFRSQIT1 | - Opcode::PFSUBR | - Opcode::PFACC | - Opcode::PFMUL | - Opcode::PFMULHRW | - Opcode::PFRCPIT2 | - Opcode::PFNACC | - Opcode::PFPNACC | - Opcode::PSWAPD | - Opcode::PAVGUSB | - Opcode::XADD| - Opcode::DIV | - Opcode::IDIV | - Opcode::MUL | - Opcode::MULX | - Opcode::NEG | - Opcode::NOT | - Opcode::SAR | - Opcode::SAL | - Opcode::SHR | - Opcode::SARX | - Opcode::SHLX | - Opcode::SHRX | - Opcode::SHRD | - Opcode::SHL | - Opcode::RCR | - Opcode::RCL | - Opcode::ROR | - Opcode::RORX | - Opcode::ROL | - Opcode::INC | - Opcode::DEC | - Opcode::SBB | - Opcode::AND | - Opcode::XOR | - Opcode::OR | - Opcode::LEA | - Opcode::ADD | - Opcode::ADC | - Opcode::ADCX | - Opcode::ADOX | - Opcode::SUB | - Opcode::POPCNT | - Opcode::LZCNT | - Opcode::VPLZCNTD | - Opcode::VPLZCNTQ | - Opcode::BT | - Opcode::BTS | - Opcode::BTR | - Opcode::BTC | - Opcode::BSF | - Opcode::BSR | - Opcode::BZHI | - Opcode::PDEP | - Opcode::PEXT | - Opcode::TZCNT | - Opcode::ANDN | - Opcode::BEXTR | - Opcode::BLSI | - Opcode::BLSMSK | - Opcode::BLSR | - Opcode::ADDPS | - Opcode::ADDPD | - Opcode::ANDNPS | - Opcode::ANDNPD | - Opcode::ANDPS | - Opcode::ANDPD | - Opcode::COMISD | - Opcode::COMISS | - Opcode::DIVPS | - Opcode::DIVPD | - Opcode::MULPS | - Opcode::MULPD | - Opcode::ORPS | - Opcode::ORPD | - Opcode::PADDB | - Opcode::PADDD | - Opcode::PADDQ | - Opcode::PADDSB | - Opcode::PADDSW | - Opcode::PADDUSB | - Opcode::PADDUSW | - Opcode::PADDW | - Opcode::PAND | - Opcode::PANDN | - Opcode::PAVGB | - Opcode::PAVGW | - Opcode::PMADDWD | - Opcode::PMULHUW | - Opcode::PMULHW | - Opcode::PMULLW | - Opcode::PMULUDQ | - Opcode::POR | - Opcode::PSADBW | - Opcode::PSHUFD | - Opcode::PSHUFW | - Opcode::PSHUFB | - Opcode::PSLLD | - Opcode::PSLLDQ | - Opcode::PSLLQ | - Opcode::PSLLW | - Opcode::PSRAD | - Opcode::PSRAW | - Opcode::PSRLD | - Opcode::PSRLDQ | - Opcode::PSRLQ | - Opcode::PSRLW | - Opcode::PSUBB | - Opcode::PSUBD | - Opcode::PSUBQ | - Opcode::PSUBSB | - Opcode::PSUBSW | - Opcode::PSUBUSB | - Opcode::PSUBUSW | - Opcode::PSUBW | - Opcode::PXOR | - Opcode::RSQRTPS | - Opcode::SQRTPS | - Opcode::SQRTPD | - Opcode::SUBPS | - Opcode::SUBPD | - Opcode::XORPS | - Opcode::XORPD | - Opcode::RCPPS | - Opcode::SHLD | - Opcode::SLHD | - Opcode::UCOMISD | - Opcode::UCOMISS | - Opcode::F2XM1 | - Opcode::FABS | - Opcode::FADD | - Opcode::FADDP | - Opcode::FCHS | - Opcode::FCOS | - Opcode::FDIV | - Opcode::FDIVP | - Opcode::FDIVR | - Opcode::FDIVRP | - Opcode::FIADD | - Opcode::FIDIV | - Opcode::FIDIVR | - Opcode::FIMUL | - Opcode::FISUB | - Opcode::FISUBR | - Opcode::FMUL | - Opcode::FMULP | - Opcode::FNCLEX | - Opcode::FNINIT | - Opcode::FPATAN | - Opcode::FPREM | - Opcode::FPREM1 | - Opcode::FPTAN | - Opcode::FRNDINT | - Opcode::FSCALE | - Opcode::FSIN | - Opcode::FSINCOS | - Opcode::FSQRT | - Opcode::FSUB | - Opcode::FSUBP | - Opcode::FSUBR | - Opcode::FSUBRP | - Opcode::FXTRACT | - Opcode::FYL2X | - Opcode::FYL2XP1 | - Opcode::AAA | - Opcode::AAS | - Opcode::DAS | - Opcode::DAA | - Opcode::AAD | - Opcode::AAM | - Opcode::KADDB | - Opcode::KANDB | - Opcode::KANDNB | - Opcode::KNOTB | - Opcode::KORB | - Opcode::KSHIFTLB | - Opcode::KSHIFTRB | - Opcode::KXNORB | - Opcode::KXORB | - Opcode::KADDW | - Opcode::KANDW | - Opcode::KANDNW | - Opcode::KNOTW | - Opcode::KORW | - Opcode::KSHIFTLW | - Opcode::KSHIFTRW | - Opcode::KXNORW | - Opcode::KXORW | - Opcode::KADDD | - Opcode::KANDD | - Opcode::KANDND | - Opcode::KNOTD | - Opcode::KORD | - Opcode::KSHIFTLD | - Opcode::KSHIFTRD | - Opcode::KXNORD | - Opcode::KXORD | - Opcode::KADDQ | - Opcode::KANDQ | - Opcode::KANDNQ | - Opcode::KNOTQ | - Opcode::KORQ | - Opcode::KSHIFTLQ | - Opcode::KSHIFTRQ | - Opcode::KXNORQ | - Opcode::KXORQ | - Opcode::IMUL => { write!(out, "{}", colors.arithmetic_op(self)) } - Opcode::POPF | - Opcode::PUSHF | - Opcode::ENTER | - Opcode::LEAVE | - Opcode::PUSHA | - Opcode::POPA | - Opcode::PUSH | - Opcode::POP => { write!(out, "{}", colors.stack_op(self)) } - Opcode::WAIT | - Opcode::FNOP | - Opcode::FDISI8087_NOP | - Opcode::FENI8087_NOP | - Opcode::FSETPM287_NOP | - Opcode::PREFETCHNTA | - Opcode::PREFETCH0 | - Opcode::PREFETCH1 | - Opcode::PREFETCH2 | - Opcode::PREFETCHW | - Opcode::NOP => { write!(out, "{}", colors.nop_op(self)) } - - /* Control flow */ - Opcode::HLT | - Opcode::INT | - Opcode::INTO | - Opcode::IRET | - Opcode::IRETD | - Opcode::IRETQ | - Opcode::RETF | - Opcode::RETURN => { write!(out, "{}", colors.stop_op(self)) } - Opcode::LOOPNZ | - Opcode::LOOPZ | - Opcode::LOOP | - Opcode::JECXZ | - Opcode::CALL | - Opcode::CALLF | - Opcode::JMP | - Opcode::JMPF | - Opcode::JO | - Opcode::JNO | - Opcode::JB | - Opcode::JNB | - Opcode::JZ | - Opcode::JNZ | - Opcode::JA | - Opcode::JNA | - Opcode::JS | - Opcode::JNS | - Opcode::JP | - Opcode::JNP | - Opcode::JL | - Opcode::JGE | - Opcode::JLE | - Opcode::JG => { write!(out, "{}", colors.control_flow_op(self)) } - - /* Data transfer */ - Opcode::PI2FW | - Opcode::PI2FD | - Opcode::PF2ID | - Opcode::PF2IW | - Opcode::VCVTDQ2PD | - Opcode::VCVTDQ2PS | - Opcode::VCVTPD2DQ | - Opcode::VCVTPD2PS | - Opcode::VCVTPH2PS | - Opcode::VCVTPS2DQ | - Opcode::VCVTPS2PD | - Opcode::VCVTPS2PH | - Opcode::VCVTTPD2DQ | - Opcode::VCVTTPS2DQ | - Opcode::VCVTSD2SI | - Opcode::VCVTSD2SS | - Opcode::VCVTSI2SD | - Opcode::VCVTSI2SS | - Opcode::VCVTSS2SD | - Opcode::VCVTSS2SI | - Opcode::VCVTTSD2SI | - Opcode::VCVTTSS2SI | - Opcode::VCVTPD2UDQ | - Opcode::VCVTTPD2UDQ | - Opcode::VCVTPS2UDQ | - Opcode::VCVTTPS2UDQ | - Opcode::VCVTQQ2PD | - Opcode::VCVTQQ2PS | - Opcode::VCVTSD2USI | - Opcode::VCVTTSD2USI | - Opcode::VCVTSS2USI | - Opcode::VCVTTSS2USI | - Opcode::VCVTUDQ2PD | - Opcode::VCVTUDQ2PS | - Opcode::VCVTUSI2USD | - Opcode::VCVTUSI2USS | - Opcode::VCVTTPD2QQ | - Opcode::VCVTPD2QQ | - Opcode::VCVTTPD2UQQ | - Opcode::VCVTPD2UQQ | - Opcode::VCVTTPS2QQ | - Opcode::VCVTPS2QQ | - Opcode::VCVTTPS2UQQ | - Opcode::VCVTPS2UQQ | - Opcode::VCVTUQQ2PD | - Opcode::VCVTUQQ2PS | - Opcode::VMOVDDUP | - Opcode::VPSHUFLW | - Opcode::VPSHUFHW | - Opcode::VBLENDMPD | - Opcode::VBLENDMPS | - Opcode::VPBLENDMD | - Opcode::VPBLENDMQ | - Opcode::VBLENDPD | - Opcode::VBLENDPS | - Opcode::VBLENDVPD | - Opcode::VBLENDVPS | - Opcode::VPBLENDMB | - Opcode::VPBLENDMW | - Opcode::PBLENDVB | - Opcode::PBLENDW | - Opcode::BLENDPD | - Opcode::BLENDPS | - Opcode::BLENDVPD | - Opcode::BLENDVPS | - Opcode::BLENDW | - Opcode::VBROADCASTF128 | - Opcode::VBROADCASTI128 | - Opcode::VBROADCASTSD | - Opcode::VBROADCASTSS | - Opcode::VPBROADCASTM | - Opcode::VEXTRACTF128 | - Opcode::VEXTRACTI128 | - Opcode::VEXTRACTPS | - Opcode::EXTRACTPS | - Opcode::VGATHERDPD | - Opcode::VGATHERDPS | - Opcode::VGATHERQPD | - Opcode::VGATHERQPS | - Opcode::VGATHERPF0DPD | - Opcode::VGATHERPF0DPS | - Opcode::VGATHERPF0QPD | - Opcode::VGATHERPF0QPS | - Opcode::VGATHERPF1DPD | - Opcode::VGATHERPF1DPS | - Opcode::VGATHERPF1QPD | - Opcode::VGATHERPF1QPS | - Opcode::VSCATTERDD | - Opcode::VSCATTERDQ | - Opcode::VSCATTERQD | - Opcode::VSCATTERQQ | - Opcode::VPSCATTERDD | - Opcode::VPSCATTERDQ | - Opcode::VPSCATTERQD | - Opcode::VPSCATTERQQ | - Opcode::VSCATTERPF0DPD | - Opcode::VSCATTERPF0DPS | - Opcode::VSCATTERPF0QPD | - Opcode::VSCATTERPF0QPS | - Opcode::VSCATTERPF1DPD | - Opcode::VSCATTERPF1DPS | - Opcode::VSCATTERPF1QPD | - Opcode::VSCATTERPF1QPS | - Opcode::VINSERTF128 | - Opcode::VINSERTI128 | - Opcode::VINSERTPS | - Opcode::INSERTPS | - Opcode::VEXTRACTF32X4 | - Opcode::VEXTRACTF64X2 | - Opcode::VEXTRACTF64X4 | - Opcode::VEXTRACTI32X4 | - Opcode::VEXTRACTI64X2 | - Opcode::VEXTRACTI64X4 | - Opcode::VINSERTF32X4 | - Opcode::VINSERTF64X2 | - Opcode::VINSERTF64X4 | - Opcode::VINSERTI64X2 | - Opcode::VINSERTI64X4 | - Opcode::VSHUFF32X4 | - Opcode::VSHUFF64X2 | - Opcode::VSHUFI32X4 | - Opcode::VSHUFI64X2 | - Opcode::VMASKMOVDQU | - Opcode::VMASKMOVPD | - Opcode::VMASKMOVPS | - Opcode::VMOVAPD | - Opcode::VMOVAPS | - Opcode::VMOVD | - Opcode::VMOVDQA | - Opcode::VMOVDQU | - Opcode::VMOVHLPS | - Opcode::VMOVHPD | - Opcode::VMOVHPS | - Opcode::VMOVLHPS | - Opcode::VMOVLPD | - Opcode::VMOVLPS | - Opcode::VMOVMSKPD | - Opcode::VMOVMSKPS | - Opcode::VMOVNTDQ | - Opcode::VMOVNTDQA | - Opcode::VMOVNTPD | - Opcode::VMOVNTPS | - Opcode::MOVDIR64B | - Opcode::MOVDIRI | - Opcode::MOVNTDQA | - Opcode::VMOVQ | - Opcode::VMOVSHDUP | - Opcode::VMOVSLDUP | - Opcode::VMOVUPD | - Opcode::VMOVUPS | - Opcode::VMOVSD | - Opcode::VMOVSS | - Opcode::VMOVDQA32 | - Opcode::VMOVDQA64 | - Opcode::VMOVDQU32 | - Opcode::VMOVDQU64 | - Opcode::VPMOVM2B | - Opcode::VPMOVM2W | - Opcode::VPMOVB2M | - Opcode::VPMOVW2M | - Opcode::VPMOVSWB | - Opcode::VPMOVUSWB | - Opcode::VPMOVSQB | - Opcode::VPMOVUSQB | - Opcode::VPMOVSQW | - Opcode::VPMOVUSQW | - Opcode::VPMOVSQD | - Opcode::VPMOVUSQD | - Opcode::VPMOVSDB | - Opcode::VPMOVUSDB | - Opcode::VPMOVSDW | - Opcode::VPMOVUSDW | - Opcode::VPMOVM2D | - Opcode::VPMOVM2Q | - Opcode::VPMOVB2D | - Opcode::VPMOVQ2M | - Opcode::VMOVDQU8 | - Opcode::VMOVDQU16 | - - Opcode::VPBLENDD | - Opcode::VPBLENDVB | - Opcode::VPBLENDW | - Opcode::VPBROADCASTB | - Opcode::VPBROADCASTD | - Opcode::VPBROADCASTQ | - Opcode::VPBROADCASTW | - Opcode::VPGATHERDD | - Opcode::VPGATHERDQ | - Opcode::VPGATHERQD | - Opcode::VPGATHERQQ | - Opcode::VPCLMULQDQ | - Opcode::VPMOVMSKB | - Opcode::VPMOVSXBD | - Opcode::VPMOVSXBQ | - Opcode::VPMOVSXBW | - Opcode::VPMOVSXDQ | - Opcode::VPMOVSXWD | - Opcode::VPMOVSXWQ | - Opcode::VPMOVZXBD | - Opcode::VPMOVZXBQ | - Opcode::VPMOVZXBW | - Opcode::VPMOVZXDQ | - Opcode::VPMOVZXWD | - Opcode::VPMOVZXWQ | - Opcode::PMOVSXBD | - Opcode::PMOVSXBQ | - Opcode::PMOVSXBW | - Opcode::PMOVSXDQ | - Opcode::PMOVSXWD | - Opcode::PMOVSXWQ | - Opcode::PMOVZXBD | - Opcode::PMOVZXBQ | - Opcode::PMOVZXBW | - Opcode::PMOVZXDQ | - Opcode::PMOVZXWD | - Opcode::PMOVZXWQ | - Opcode::KUNPCKBW | - Opcode::KUNPCKWD | - Opcode::KUNPCKDQ | - Opcode::VUNPCKHPD | - Opcode::VUNPCKHPS | - Opcode::VUNPCKLPD | - Opcode::VUNPCKLPS | - Opcode::VPUNPCKHBW | - Opcode::VPUNPCKHDQ | - Opcode::VPUNPCKHQDQ | - Opcode::VPUNPCKHWD | - Opcode::VPUNPCKLBW | - Opcode::VPUNPCKLDQ | - Opcode::VPUNPCKLQDQ | - Opcode::VPUNPCKLWD | - Opcode::VSHUFPD | - Opcode::VSHUFPS | - Opcode::VPACKSSDW | - Opcode::VPACKUSDW | - Opcode::PACKUSDW | - Opcode::VPACKSSWB | - Opcode::VPACKUSWB | - Opcode::VALIGND | - Opcode::VALIGNQ | - Opcode::VPALIGNR | - Opcode::PALIGNR | - Opcode::VPERM2F128 | - Opcode::VPERM2I128 | - Opcode::VPERMD | - Opcode::VPERMILPD | - Opcode::VPERMILPS | - Opcode::VPERMPD | - Opcode::VPERMPS | - Opcode::VPERMQ | - Opcode::VPERMI2D | - Opcode::VPERMI2Q | - Opcode::VPERMI2PD | - Opcode::VPERMI2PS | - Opcode::VPERMT2D | - Opcode::VPERMT2Q | - Opcode::VPERMT2PD | - Opcode::VPERMT2PS | - Opcode::VPERMI2B | - Opcode::VPERMI2W | - Opcode::VPERMW | - Opcode::VPEXTRB | - Opcode::VPEXTRD | - Opcode::VPEXTRQ | - Opcode::VPEXTRW | - Opcode::PEXTRB | - Opcode::PEXTRD | - Opcode::PEXTRQ | - Opcode::EXTRQ | - Opcode::PINSRB | - Opcode::PINSRD | - Opcode::PINSRQ | - Opcode::INSERTQ | - Opcode::VPINSRB | - Opcode::VPINSRD | - Opcode::VPINSRQ | - Opcode::VPINSRW | - Opcode::VPMASKMOVD | - Opcode::VPMASKMOVQ | - Opcode::VCOMPRESSPD | - Opcode::VCOMPRESSPS | - Opcode::VPCOMPRESSQ | - Opcode::VPCOMPRESSD | - Opcode::VEXPANDPD | - Opcode::VEXPANDPS | - Opcode::VPSHUFB | - Opcode::VPSHUFD | - Opcode::VPHMINPOSUW | - Opcode::PHMINPOSUW | - Opcode::VZEROUPPER | - Opcode::VZEROALL | - Opcode::VFIXUPIMMPD | - Opcode::VFIXUPIMMPS | - Opcode::VFIXUPIMMSD | - Opcode::VFIXUPIMMSS | - Opcode::VREDUCEPD | - Opcode::VREDUCEPS | - Opcode::VREDUCESD | - Opcode::VREDUCESS | - Opcode::VGETEXPPD | - Opcode::VGETEXPPS | - Opcode::VGETEXPSD | - Opcode::VGETEXPSS | - Opcode::VGETMANTPD | - Opcode::VGETMANTPS | - Opcode::VGETMANTSD | - Opcode::VGETMANTSS | - Opcode::VLDDQU | - Opcode::BSWAP | - Opcode::CVTDQ2PD | - Opcode::CVTDQ2PS | - Opcode::CVTPS2DQ | - Opcode::CVTPD2DQ | - Opcode::CVTPI2PS | - Opcode::CVTPI2PD | - Opcode::CVTPS2PD | - Opcode::CVTPD2PS | - Opcode::CVTPS2PI | - Opcode::CVTPD2PI | - Opcode::CVTSD2SI | - Opcode::CVTSD2SS | - Opcode::CVTSI2SD | - Opcode::CVTSI2SS | - Opcode::CVTSS2SD | - Opcode::CVTSS2SI | - Opcode::CVTTPD2DQ | - Opcode::CVTTPS2DQ | - Opcode::CVTTPS2PI | - Opcode::CVTTPD2PI | - Opcode::CVTTSD2SI | - Opcode::CVTTSS2SI | - Opcode::MASKMOVQ | - Opcode::MASKMOVDQU | - Opcode::MOVAPS | - Opcode::MOVAPD | - Opcode::MOVD | - Opcode::MOVHPS | - Opcode::MOVHPD | - Opcode::MOVHLPS | - Opcode::MOVLPS | - Opcode::MOVLPD | - Opcode::MOVLHPS | - Opcode::MOVMSKPS | - Opcode::MOVMSKPD | - Opcode::MOVNTI | - Opcode::MOVNTPS | - Opcode::MOVNTPD | - Opcode::MOVNTSS | - Opcode::MOVNTSD | - Opcode::MOVNTQ | - Opcode::MOVNTDQ | - Opcode::MOVSD | - Opcode::MOVSS | - Opcode::MOVUPD | - Opcode::PSHUFHW | - Opcode::PSHUFLW | - Opcode::PUNPCKHBW | - Opcode::PUNPCKHDQ | - Opcode::PUNPCKHWD | - Opcode::PUNPCKLBW | - Opcode::PUNPCKLDQ | - Opcode::PUNPCKLWD | - Opcode::PUNPCKLQDQ | - Opcode::PUNPCKHQDQ | - Opcode::PACKSSDW | - Opcode::PACKSSWB | - Opcode::PACKUSWB | - Opcode::UNPCKHPS | - Opcode::UNPCKHPD | - Opcode::UNPCKLPS | - Opcode::UNPCKLPD | - Opcode::SHUFPD | - Opcode::SHUFPS | - Opcode::PMOVMSKB | - Opcode::KMOVB | - Opcode::KMOVW | - Opcode::KMOVD | - Opcode::KMOVQ | - Opcode::BNDMOV | - Opcode::LDDQU | - Opcode::CMC | - Opcode::CLC | - Opcode::CLI | - Opcode::CLD | - Opcode::STC | - Opcode::STI | - Opcode::STD | - Opcode::CBW | - Opcode::CWDE | - Opcode::CDQE | - Opcode::CWD | - Opcode::CDQ | - Opcode::CQO | - Opcode::MOVDDUP | - Opcode::MOVSLDUP | - Opcode::MOVDQ2Q | - Opcode::MOVDQU | - Opcode::MOVDQA | - Opcode::MOVQ | - Opcode::MOVQ2DQ | - Opcode::MOVSHDUP | - Opcode::MOVUPS | - Opcode::PEXTRW | - Opcode::PINSRW | - Opcode::MOV | - Opcode::MOVBE | - Opcode::LODS | - Opcode::STOS | - Opcode::LAHF | - Opcode::SAHF | - Opcode::MOVS | - Opcode::INS | - Opcode::IN | - Opcode::OUTS | - Opcode::OUT | - Opcode::MOVZX | - Opcode::MOVSX | - Opcode::MOVSXD | - Opcode::FILD | - Opcode::FBLD | - Opcode::FBSTP | - Opcode::FIST | - Opcode::FISTP | - Opcode::FISTTP | - Opcode::FLD | - Opcode::FLD1 | - Opcode::FLDCW | - Opcode::FLDENV | - Opcode::FLDL2E | - Opcode::FLDL2T | - Opcode::FLDLG2 | - Opcode::FLDLN2 | - Opcode::FLDPI | - Opcode::FLDZ | - Opcode::FST | - Opcode::FSTP | - Opcode::FSTPNCE | - Opcode::FNSAVE | - Opcode::FNSTCW | - Opcode::FNSTENV | - Opcode::FNSTOR | - Opcode::FNSTSW | - Opcode::FRSTOR | - Opcode::FXCH | - Opcode::XCHG | - Opcode::XLAT | - Opcode::CMOVA | - Opcode::CMOVB | - Opcode::CMOVG | - Opcode::CMOVGE | - Opcode::CMOVL | - Opcode::CMOVLE | - Opcode::CMOVNA | - Opcode::CMOVNB | - Opcode::CMOVNO | - Opcode::CMOVNP | - Opcode::CMOVNS | - Opcode::CMOVNZ | - Opcode::CMOVO | - Opcode::CMOVP | - Opcode::CMOVS | - Opcode::CMOVZ | - Opcode::FCMOVB | - Opcode::FCMOVBE | - Opcode::FCMOVE | - Opcode::FCMOVNB | - Opcode::FCMOVNBE | - Opcode::FCMOVNE | - Opcode::FCMOVNU | - Opcode::FCMOVU | - Opcode::SALC | - Opcode::SETO | - Opcode::SETNO | - Opcode::SETB | - Opcode::SETAE | - Opcode::SETZ | - Opcode::SETNZ | - Opcode::SETBE | - Opcode::SETA | - Opcode::SETS | - Opcode::SETNS | - Opcode::SETP | - Opcode::SETNP | - Opcode::SETL | - Opcode::SETGE | - Opcode::SETLE | - Opcode::SETG => { write!(out, "{}", colors.data_op(self)) } - - Opcode::VCOMISD | - Opcode::VCOMISS | - Opcode::VUCOMISD | - Opcode::VUCOMISS | - Opcode::KORTESTB | - Opcode::KTESTB | - Opcode::KORTESTW | - Opcode::KTESTW | - Opcode::KORTESTD | - Opcode::KTESTD | - Opcode::KORTESTQ | - Opcode::KTESTQ | - Opcode::VPTESTNMD | - Opcode::VPTESTNMQ | - Opcode::VPTERNLOGD | - Opcode::VPTERNLOGQ | - Opcode::VPTESTMD | - Opcode::VPTESTMQ | - Opcode::VPTESTNMB | - Opcode::VPTESTNMW | - Opcode::VPTESTMB | - Opcode::VPTESTMW | - Opcode::VPCMPD | - Opcode::VPCMPUD | - Opcode::VPCMPQ | - Opcode::VPCMPUQ | - Opcode::VPCMPB | - Opcode::VPCMPUB | - Opcode::VPCMPW | - Opcode::VPCMPUW | - Opcode::VCMPPD | - Opcode::VCMPPS | - Opcode::VCMPSD | - Opcode::VCMPSS | - Opcode::VMAXPD | - Opcode::VMAXPS | - Opcode::VMAXSD | - Opcode::VMAXSS | - Opcode::VPMAXSQ | - Opcode::VPMAXUQ | - Opcode::VPMINSQ | - Opcode::VPMINUQ | - Opcode::VMINPD | - Opcode::VMINPS | - Opcode::VMINSD | - Opcode::VMINSS | - Opcode::VPCMPEQB | - Opcode::VPCMPEQD | - Opcode::VPCMPEQQ | - Opcode::VPCMPEQW | - Opcode::VPCMPGTB | - Opcode::VPCMPGTD | - Opcode::VPCMPGTQ | - Opcode::VPCMPGTW | - Opcode::VPCMPESTRI | - Opcode::VPCMPESTRM | - Opcode::VPCMPISTRI | - Opcode::VPCMPISTRM | - Opcode::VPMAXSB | - Opcode::VPMAXSD | - Opcode::VPMAXSW | - Opcode::VPMAXUB | - Opcode::VPMAXUW | - Opcode::VPMAXUD | - Opcode::VPMINSB | - Opcode::VPMINSW | - Opcode::VPMINSD | - Opcode::VPMINUB | - Opcode::VPMINUW | - Opcode::VPMINUD | - Opcode::VFPCLASSPD | - Opcode::VFPCLASSPS | - Opcode::VFPCLASSSD | - Opcode::VFPCLASSSS | - Opcode::VRANGEPD | - Opcode::VRANGEPS | - Opcode::VRANGESD | - Opcode::VRANGESS | - Opcode::VPCONFLICTD | - Opcode::VPCONFLICTQ | - Opcode::VPTEST | - Opcode::VTESTPD | - Opcode::VTESTPS | - Opcode::PCMPEQB | - Opcode::PCMPEQD | - Opcode::PCMPEQQ | - Opcode::PCMPEQW | - Opcode::PCMPESTRI | - Opcode::PCMPESTRM | - Opcode::PCMPGTB | - Opcode::PCMPGTD | - Opcode::PCMPGTQ | - Opcode::PCMPGTW | - Opcode::PCMPISTRI | - Opcode::PCMPISTRM | - Opcode::PTEST | - Opcode::MAXPD | - Opcode::MAXPS | - Opcode::MAXSD | - Opcode::MAXSS | - Opcode::MINPD | - Opcode::MINPS | - Opcode::MINSD | - Opcode::MINSS | - Opcode::PMAXSB | - Opcode::PMAXSD | - Opcode::PMAXSW | - Opcode::PMAXUB | - Opcode::PMAXUD | - Opcode::PMAXUW | - Opcode::PMINSB | - Opcode::PMINSD | - Opcode::PMINSW | - Opcode::PMINUB | - Opcode::PMINUD | - Opcode::PMINUW | - Opcode::PFCMPGE | - Opcode::PFMIN | - Opcode::PFCMPGT | - Opcode::PFMAX | - Opcode::PFCMPEQ | - Opcode::CMPS | - Opcode::SCAS | - Opcode::TEST | - Opcode::FTST | - Opcode::FXAM | - Opcode::FUCOM | - Opcode::FUCOMI | - Opcode::FUCOMIP | - Opcode::FUCOMP | - Opcode::FUCOMPP | - Opcode::FCOM | - Opcode::FCOMI | - Opcode::FCOMIP | - Opcode::FCOMP | - Opcode::FCOMPP | - Opcode::FICOM | - Opcode::FICOMP | - Opcode::CMPSD | - Opcode::CMPSS | - Opcode::CMP | - Opcode::CMPPS | - Opcode::CMPPD | - Opcode::CMPXCHG8B | - Opcode::CMPXCHG16B | - Opcode::CMPXCHG => { write!(out, "{}", colors.comparison_op(self)) } - - Opcode::WRMSR | - Opcode::RDMSR | - Opcode::RDTSC | - Opcode::RDPMC | - Opcode::RDPID | - Opcode::RDFSBASE | - Opcode::RDGSBASE | - Opcode::WRFSBASE | - Opcode::WRGSBASE | - Opcode::FXSAVE | - Opcode::FXRSTOR | - Opcode::LDMXCSR | - Opcode::STMXCSR | - Opcode::VLDMXCSR | - Opcode::VSTMXCSR | - Opcode::XSAVE | - Opcode::XSAVEC | - Opcode::XSAVES | - Opcode::XSAVEC64 | - Opcode::XSAVES64 | - Opcode::XRSTOR | - Opcode::XRSTORS | - Opcode::XRSTORS64 | - Opcode::XSAVEOPT | - Opcode::LFENCE | - Opcode::MFENCE | - Opcode::SFENCE | - Opcode::CLFLUSH | - Opcode::CLFLUSHOPT | - Opcode::CLWB | - Opcode::LDS | - Opcode::LES | - Opcode::SGDT | - Opcode::SIDT | - Opcode::LGDT | - Opcode::LIDT | - Opcode::SMSW | - Opcode::LMSW | - Opcode::SWAPGS | - Opcode::RDTSCP | - Opcode::INVEPT | - Opcode::INVVPID | - Opcode::INVPCID | - Opcode::INVLPG | - Opcode::INVLPGA | - Opcode::INVLPGB | - Opcode::TLBSYNC | - Opcode::PSMASH | - Opcode::PVALIDATE | - Opcode::RMPADJUST | - Opcode::RMPUPDATE | - Opcode::CPUID | - Opcode::WBINVD | - Opcode::INVD | - Opcode::SYSRET | - Opcode::CLTS | - Opcode::SYSCALL | - Opcode::TDCALL | - Opcode::SEAMRET | - Opcode::SEAMOPS | - Opcode::SEAMCALL | - Opcode::TPAUSE | - Opcode::UMONITOR | - Opcode::UMWAIT | - Opcode::LSL | - Opcode::SLDT | - Opcode::STR | - Opcode::LLDT | - Opcode::LTR | - Opcode::VERR | - Opcode::VERW | - Opcode::JMPE | - Opcode::EMMS | - Opcode::FEMMS | - Opcode::GETSEC | - Opcode::LFS | - Opcode::LGS | - Opcode::LSS | - Opcode::RSM | - Opcode::SYSENTER | - Opcode::SYSEXIT | - Opcode::VMREAD | - Opcode::VMWRITE | - Opcode::VMCLEAR | - Opcode::VMPTRLD | - Opcode::VMPTRST | - Opcode::VMXON | - Opcode::VMCALL | - Opcode::VMLAUNCH | - Opcode::VMRESUME | - Opcode::VMLOAD | - Opcode::VMMCALL | - Opcode::VMSAVE | - Opcode::VMRUN | - Opcode::VMXOFF | - Opcode::PCONFIG | - Opcode::MONITOR | - Opcode::MWAIT | - Opcode::MONITORX | - Opcode::MWAITX | - Opcode::SKINIT | - Opcode::CLGI | - Opcode::STGI | - Opcode::CLAC | - Opcode::STAC | - Opcode::ENCLS | - Opcode::ENCLV | - Opcode::XGETBV | - Opcode::XSETBV | - Opcode::VMFUNC | - Opcode::XEND | - Opcode::XTEST | - Opcode::XABORT | - Opcode::XBEGIN | - Opcode::ENCLU | - Opcode::RDPKRU | - Opcode::WRPKRU | - Opcode::RDPRU | - Opcode::CLZERO | - Opcode::ENQCMD | - Opcode::ENQCMDS | - Opcode::PTWRITE | - Opcode::UIRET | - Opcode::TESTUI | - Opcode::CLUI | - Opcode::STUI | - Opcode::SENDUIPI | - Opcode::XSUSLDTRK | - Opcode::XRESLDTRK | - Opcode::BOUND | - Opcode::ARPL | - Opcode::BNDMK | - Opcode::BNDCL | - Opcode::BNDCU | - Opcode::BNDCN | - Opcode::BNDLDX | - Opcode::BNDSTX | - Opcode::LAR => { write!(out, "{}", colors.platform_op(self)) } - - Opcode::CRC32 | - Opcode::RDSEED | - Opcode::RDRAND | - Opcode::SHA1RNDS4 | - Opcode::SHA1NEXTE | - Opcode::SHA1MSG1 | - Opcode::SHA1MSG2 | - Opcode::SHA256RNDS2 | - Opcode::SHA256MSG1 | - Opcode::SHA256MSG2 | - Opcode::FFREE | - Opcode::FFREEP | - Opcode::FDECSTP | - Opcode::FINCSTP | - Opcode::GF2P8MULB | - Opcode::GF2P8AFFINEQB | - Opcode::GF2P8AFFINEINVQB | - Opcode::AESDEC128KL | - Opcode::AESDEC256KL | - Opcode::AESDECWIDE128KL | - Opcode::AESDECWIDE256KL | - Opcode::AESENC128KL | - Opcode::AESENC256KL | - Opcode::AESENCWIDE128KL | - Opcode::AESENCWIDE256KL | - Opcode::ENCODEKEY128 | - Opcode::ENCODEKEY256 | - Opcode::LOADIWKEY | - Opcode::HRESET | - Opcode::WRUSS | - Opcode::WRSS | - Opcode::INCSSP | - Opcode::SAVEPREVSSP | - Opcode::SETSSBSY | - Opcode::CLRSSBSY | - Opcode::RSTORSSP | - Opcode::ENDBR64 | - Opcode::ENDBR32 | - Opcode::AESDEC | - Opcode::AESDECLAST | - Opcode::AESENC | - Opcode::AESENCLAST | - Opcode::AESIMC | - Opcode::AESKEYGENASSIST | - Opcode::VAESDEC | - Opcode::VAESDECLAST | - Opcode::VAESENC | - Opcode::VAESENCLAST | - Opcode::VAESIMC | - Opcode::VAESKEYGENASSIST => { write!(out, "{}", colors.misc_op(self)) } - - Opcode::UD0 | - Opcode::UD1 | - Opcode::UD2 | - Opcode::Invalid => { write!(out, "{}", colors.invalid_op(self)) } - } + fn colorize(&self, _colors: &Y, out: &mut T) -> fmt::Result { + // see `impl Colorize for long_mode::Opcode for more about this + out.write_str(self.name()) } } impl fmt::Display for Instruction { fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + // to reuse one implementation, call the deprecated function for now. + #[allow(deprecated)] self.display_with(DisplayStyle::Intel).colorize(&NoColors, fmt) } } impl<'instr> fmt::Display for InstructionDisplayer<'instr> { fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + // to reuse one implementation, call the deprecated function for now. + #[allow(deprecated)] self.colorize(&NoColors, fmt) } } @@ -3623,6 +2200,8 @@ pub struct InstructionDisplayer<'instr> { * so write to some Write thing i guess. bite me. i really just want to * stop thinking about how to support printing instructions... */ +// allowing these deprecated items for the time being, not yet breaking yaxpeax-x86 apis +#[allow(deprecated)] impl <'instr, T: fmt::Write, Y: YaxColors> Colorize for InstructionDisplayer<'instr> { fn colorize(&self, colors: &Y, out: &mut T) -> fmt::Result { // TODO: I DONT LIKE THIS, there is no address i can give contextualize here, @@ -3635,8 +2214,18 @@ impl <'instr, T: fmt::Write, Y: YaxColors> Colorize for InstructionDisplay struct NoContext; impl Instruction { + /// format this instruction into `out` as a plain text string. + #[cfg_attr(feature="profiling", inline(never))] pub fn write_to(&self, out: &mut T) -> fmt::Result { - self.display_with(DisplayStyle::Intel).contextualize(&NoColors, 0, Some(&NoContext), out) + let mut out = yaxpeax_arch::display::FmtSink::new(out); + contextualize_intel(self, &mut out) + } + + /// format this instruction into `out`, which may perform additional styling based on its + /// `DisplaySink` implementation. + #[cfg_attr(feature="profiling", inline(never))] + pub fn display_into(&self, out: &mut T) -> fmt::Result { + contextualize_intel(self, out) } } @@ -4123,6 +2712,8 @@ pub(crate) fn contextualize_c(instr: &Instruction, out: &mut T) Ok(()) } +// allowing these deprecated items for the time being, not yet breaking yaxpeax-x86 apis +#[allow(deprecated)] impl <'instr, T: fmt::Write, Y: YaxColors> ShowContextual for InstructionDisplayer<'instr> { fn contextualize(&self, _colors: &Y, _address: u32, _context: Option<&NoContext>, out: &mut T) -> fmt::Result { let InstructionDisplayer { @@ -4143,6 +2734,8 @@ impl <'instr, T: fmt::Write, Y: YaxColors> ShowContextual } } +// allowing these deprecated items for the time being, not yet breaking yaxpeax-x86 apis +#[allow(deprecated)] #[cfg(feature="std")] impl ShowContextual], T, Y> for Instruction { fn contextualize(&self, colors: &Y, _address: u64, context: Option<&[Option]>, out: &mut T) -> fmt::Result { diff --git a/src/real_mode/display.rs b/src/real_mode/display.rs index f574499..b76f47e 100644 --- a/src/real_mode/display.rs +++ b/src/real_mode/display.rs @@ -1,5 +1,7 @@ use core::fmt; +// allowing these deprecated items for the time being, not yet breaking yaxpeax-x86 apis +#[allow(deprecated)] use yaxpeax_arch::{Colorize, ShowContextual, NoColors, YaxColors}; use crate::MEM_SIZE_STRINGS; @@ -251,12 +253,16 @@ impl fmt::Display for RegSpec { impl fmt::Display for Operand { fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + // to reuse one implementation, call the deprecated function for now. + #[allow(deprecated)] self.colorize(&NoColors, fmt) } } +// allowing these deprecated items for the time being, not yet breaking yaxpeax-x86 apis +#[allow(deprecated)] impl Colorize for Operand { - fn colorize(&self, colors: &Y, f: &mut T) -> fmt::Result { + fn colorize(&self, _colors: &Y, f: &mut T) -> fmt::Result { let mut f = yaxpeax_arch::display::FmtSink::new(f); let mut visitor = DisplayingOperandVisitor { f: &mut f @@ -2130,1456 +2136,27 @@ impl Opcode { } } +// allowing these deprecated items for the time being, not yet breaking yaxpeax-x86 apis +#[allow(deprecated)] impl Colorize for Opcode { - fn colorize(&self, colors: &Y, out: &mut T) -> fmt::Result { - match self { - Opcode::VGF2P8AFFINEQB | - Opcode::VGF2P8AFFINEINVQB | - Opcode::VPSHRDQ | - Opcode::VPSHRDD | - Opcode::VPSHRDW | - Opcode::VPSHLDQ | - Opcode::VPSHLDD | - Opcode::VPSHLDW | - Opcode::VBROADCASTF32X8 | - Opcode::VBROADCASTF64X4 | - Opcode::VBROADCASTF32X4 | - Opcode::VBROADCASTF64X2 | - Opcode::VBROADCASTF32X2 | - Opcode::VBROADCASTI32X8 | - Opcode::VBROADCASTI64X4 | - Opcode::VBROADCASTI32X4 | - Opcode::VBROADCASTI64X2 | - Opcode::VBROADCASTI32X2 | - Opcode::VEXTRACTI32X8 | - Opcode::VEXTRACTF32X8 | - Opcode::VINSERTI32X8 | - Opcode::VINSERTF32X8 | - Opcode::VINSERTI32X4 | - Opcode::V4FNMADDSS | - Opcode::V4FNMADDPS | - Opcode::VCVTNEPS2BF16 | - Opcode::V4FMADDSS | - Opcode::V4FMADDPS | - Opcode::VCVTNE2PS2BF16 | - Opcode::VP2INTERSECTD | - Opcode::VP2INTERSECTQ | - Opcode::VP4DPWSSDS | - Opcode::VP4DPWSSD | - Opcode::VPDPWSSDS | - Opcode::VPDPWSSD | - Opcode::VPDPBUSDS | - Opcode::VDPBF16PS | - Opcode::VPBROADCASTMW2D | - Opcode::VPBROADCASTMB2Q | - Opcode::VPMOVD2M | - Opcode::VPMOVQD | - Opcode::VPMOVWB | - Opcode::VPMOVDB | - Opcode::VPMOVDW | - Opcode::VPMOVQB | - Opcode::VPMOVQW | - Opcode::VGF2P8MULB | - Opcode::VPMADD52HUQ | - Opcode::VPMADD52LUQ | - Opcode::VPSHUFBITQMB | - Opcode::VPERMB | - Opcode::VPEXPANDD | - Opcode::VPEXPANDQ | - Opcode::VPABSQ | - Opcode::VPRORVD | - Opcode::VPRORVQ | - Opcode::VPMULTISHIFTQB | - Opcode::VPERMT2B | - Opcode::VPERMT2W | - Opcode::VPSHRDVQ | - Opcode::VPSHRDVD | - Opcode::VPSHRDVW | - Opcode::VPSHLDVQ | - Opcode::VPSHLDVD | - Opcode::VPSHLDVW | - Opcode::VPCOMPRESSB | - Opcode::VPCOMPRESSW | - Opcode::VPEXPANDB | - Opcode::VPEXPANDW | - Opcode::VPOPCNTD | - Opcode::VPOPCNTQ | - Opcode::VPOPCNTB | - Opcode::VPOPCNTW | - Opcode::VSCALEFSS | - Opcode::VSCALEFSD | - Opcode::VSCALEFPS | - Opcode::VSCALEFPD | - Opcode::VPDPBUSD | - Opcode::VCVTUSI2SD | - Opcode::VCVTUSI2SS | - Opcode::VPXORD | - Opcode::VPXORQ | - Opcode::VPORD | - Opcode::VPORQ | - Opcode::VPANDND | - Opcode::VPANDNQ | - Opcode::VPANDD | - Opcode::VPANDQ | - - Opcode::VHADDPS | - Opcode::VHSUBPS | - Opcode::VADDSUBPS | - Opcode::VADDPD | - Opcode::VADDPS | - Opcode::VADDSD | - Opcode::VADDSS | - Opcode::VADDSUBPD | - Opcode::VFMADD132PD | - Opcode::VFMADD132PS | - Opcode::VFMADD132SD | - Opcode::VFMADD132SS | - Opcode::VFMADD213PD | - Opcode::VFMADD213PS | - Opcode::VFMADD213SD | - Opcode::VFMADD213SS | - Opcode::VFMADD231PD | - Opcode::VFMADD231PS | - Opcode::VFMADD231SD | - Opcode::VFMADD231SS | - Opcode::VFMADDSUB132PD | - Opcode::VFMADDSUB132PS | - Opcode::VFMADDSUB213PD | - Opcode::VFMADDSUB213PS | - Opcode::VFMADDSUB231PD | - Opcode::VFMADDSUB231PS | - Opcode::VFMSUB132PD | - Opcode::VFMSUB132PS | - Opcode::VFMSUB132SD | - Opcode::VFMSUB132SS | - Opcode::VFMSUB213PD | - Opcode::VFMSUB213PS | - Opcode::VFMSUB213SD | - Opcode::VFMSUB213SS | - Opcode::VFMSUB231PD | - Opcode::VFMSUB231PS | - Opcode::VFMSUB231SD | - Opcode::VFMSUB231SS | - Opcode::VFMSUBADD132PD | - Opcode::VFMSUBADD132PS | - Opcode::VFMSUBADD213PD | - Opcode::VFMSUBADD213PS | - Opcode::VFMSUBADD231PD | - Opcode::VFMSUBADD231PS | - Opcode::VFNMADD132PD | - Opcode::VFNMADD132PS | - Opcode::VFNMADD132SD | - Opcode::VFNMADD132SS | - Opcode::VFNMADD213PD | - Opcode::VFNMADD213PS | - Opcode::VFNMADD213SD | - Opcode::VFNMADD213SS | - Opcode::VFNMADD231PD | - Opcode::VFNMADD231PS | - Opcode::VFNMADD231SD | - Opcode::VFNMADD231SS | - Opcode::VFNMSUB132PD | - Opcode::VFNMSUB132PS | - Opcode::VFNMSUB132SD | - Opcode::VFNMSUB132SS | - Opcode::VFNMSUB213PD | - Opcode::VFNMSUB213PS | - Opcode::VFNMSUB213SD | - Opcode::VFNMSUB213SS | - Opcode::VFNMSUB231PD | - Opcode::VFNMSUB231PS | - Opcode::VFNMSUB231SD | - Opcode::VFNMSUB231SS | - Opcode::VDIVPD | - Opcode::VDIVPS | - Opcode::VDIVSD | - Opcode::VDIVSS | - Opcode::VHADDPD | - Opcode::VHSUBPD | - Opcode::HADDPD | - Opcode::HSUBPD | - Opcode::VMULPD | - Opcode::VMULPS | - Opcode::VMULSD | - Opcode::VMULSS | - Opcode::VPABSB | - Opcode::VPABSD | - Opcode::VPABSW | - Opcode::PABSB | - Opcode::PABSD | - Opcode::PABSW | - Opcode::VPSIGNB | - Opcode::VPSIGND | - Opcode::VPSIGNW | - Opcode::PSIGNB | - Opcode::PSIGND | - Opcode::PSIGNW | - Opcode::VPADDB | - Opcode::VPADDD | - Opcode::VPADDQ | - Opcode::VPADDSB | - Opcode::VPADDSW | - Opcode::VPADDUSB | - Opcode::VPADDUSW | - Opcode::VPADDW | - Opcode::VPAVGB | - Opcode::VPAVGW | - Opcode::VPMULDQ | - Opcode::VPMULHRSW | - Opcode::VPMULHUW | - Opcode::VPMULHW | - Opcode::VPMULLQ | - Opcode::VPMULLD | - Opcode::VPMULLW | - Opcode::VPMULUDQ | - Opcode::PCLMULQDQ | - Opcode::PMULDQ | - Opcode::PMULHRSW | - Opcode::PMULLD | - Opcode::VPSUBB | - Opcode::VPSUBD | - Opcode::VPSUBQ | - Opcode::VPSUBSB | - Opcode::VPSUBSW | - Opcode::VPSUBUSB | - Opcode::VPSUBUSW | - Opcode::VPSUBW | - Opcode::VROUNDPD | - Opcode::VROUNDPS | - Opcode::VEXP2PD | - Opcode::VEXP2PS | - Opcode::VEXP2SD | - Opcode::VEXP2SS | - Opcode::VRCP28PD | - Opcode::VRCP28PS | - Opcode::VRCP28SD | - Opcode::VRCP28SS | - Opcode::VRCP14PD | - Opcode::VRCP14PS | - Opcode::VRCP14SD | - Opcode::VRCP14SS | - Opcode::VRNDSCALEPD | - Opcode::VRNDSCALEPS | - Opcode::VRNDSCALESD | - Opcode::VRNDSCALESS | - Opcode::VRSQRT14PD | - Opcode::VRSQRT14PS | - Opcode::VRSQRT14SD | - Opcode::VRSQRT14SS | - Opcode::VSCALEDPD | - Opcode::VSCALEDPS | - Opcode::VSCALEDSD | - Opcode::VSCALEDSS | - Opcode::VRSQRT28PD | - Opcode::VRSQRT28PS | - Opcode::VRSQRT28SD | - Opcode::VRSQRT28SS | - Opcode::VRSQRTPS | - Opcode::VSQRTPD | - Opcode::VSQRTPS | - Opcode::VSUBPD | - Opcode::VSUBPS | - Opcode::VSUBSD | - Opcode::VSUBSS | - Opcode::VRCPSS | - Opcode::VROUNDSD | - Opcode::VROUNDSS | - Opcode::ROUNDPD | - Opcode::ROUNDPS | - Opcode::ROUNDSD | - Opcode::ROUNDSS | - Opcode::VRSQRTSS | - Opcode::VSQRTSD | - Opcode::VSQRTSS | - Opcode::VPSADBW | - Opcode::VMPSADBW | - Opcode::VDBPSADBW | - Opcode::VPHADDD | - Opcode::VPHADDSW | - Opcode::VPHADDW | - Opcode::VPHSUBD | - Opcode::VPHSUBSW | - Opcode::VPHSUBW | - Opcode::VPMADDUBSW | - Opcode::VPMADDWD | - Opcode::VDPPD | - Opcode::VDPPS | - Opcode::VRCPPS | - Opcode::VORPD | - Opcode::VORPS | - Opcode::VANDPD | - Opcode::VANDPS | - Opcode::VANDNPD | - Opcode::VANDNPS | - Opcode::VPAND | - Opcode::VPANDN | - Opcode::VPOR | - Opcode::VPXOR | - Opcode::VXORPD | - Opcode::VXORPS | - Opcode::VPSLLD | - Opcode::VPSLLDQ | - Opcode::VPSLLQ | - Opcode::VPSLLVD | - Opcode::VPSLLVQ | - Opcode::VPSLLW | - Opcode::VPROLD | - Opcode::VPROLQ | - Opcode::VPROLVD | - Opcode::VPROLVQ | - Opcode::VPRORD | - Opcode::VPRORQ | - Opcode::VPRORRD | - Opcode::VPRORRQ | - Opcode::VPSLLVW | - Opcode::VPSRAQ | - Opcode::VPSRAVQ | - Opcode::VPSRAVW | - Opcode::VPSRLVW | - Opcode::VPSRAD | - Opcode::VPSRAVD | - Opcode::VPSRAW | - Opcode::VPSRLD | - Opcode::VPSRLDQ | - Opcode::VPSRLQ | - Opcode::VPSRLVD | - Opcode::VPSRLVQ | - Opcode::VPSRLW | - Opcode::PHADDD | - Opcode::PHADDSW | - Opcode::PHADDW | - Opcode::PHSUBD | - Opcode::PHSUBSW | - Opcode::PHSUBW | - Opcode::PMADDUBSW | - Opcode::ADDSUBPD | - Opcode::DPPS | - Opcode::DPPD | - Opcode::MPSADBW | - Opcode::RCPSS | - Opcode::RSQRTSS | - Opcode::SQRTSD | - Opcode::ADDSD | - Opcode::SUBSD | - Opcode::MULSD | - Opcode::DIVSD | - Opcode::SQRTSS | - Opcode::ADDSS | - Opcode::SUBSS | - Opcode::MULSS | - Opcode::DIVSS | - Opcode::HADDPS | - Opcode::HSUBPS | - Opcode::ADDSUBPS | - Opcode::PMULHRW | - Opcode::PFRCP | - Opcode::PFRSQRT | - Opcode::PFSUB | - Opcode::PFADD | - Opcode::PFRCPIT1 | - Opcode::PFRSQIT1 | - Opcode::PFSUBR | - Opcode::PFACC | - Opcode::PFMUL | - Opcode::PFMULHRW | - Opcode::PFRCPIT2 | - Opcode::PFNACC | - Opcode::PFPNACC | - Opcode::PSWAPD | - Opcode::PAVGUSB | - Opcode::XADD| - Opcode::DIV | - Opcode::IDIV | - Opcode::MUL | - Opcode::MULX | - Opcode::NEG | - Opcode::NOT | - Opcode::SAR | - Opcode::SAL | - Opcode::SHR | - Opcode::SARX | - Opcode::SHLX | - Opcode::SHRX | - Opcode::SHRD | - Opcode::SHL | - Opcode::RCR | - Opcode::RCL | - Opcode::ROR | - Opcode::RORX | - Opcode::ROL | - Opcode::INC | - Opcode::DEC | - Opcode::SBB | - Opcode::AND | - Opcode::XOR | - Opcode::OR | - Opcode::LEA | - Opcode::ADD | - Opcode::ADC | - Opcode::ADCX | - Opcode::ADOX | - Opcode::SUB | - Opcode::POPCNT | - Opcode::LZCNT | - Opcode::VPLZCNTD | - Opcode::VPLZCNTQ | - Opcode::BT | - Opcode::BTS | - Opcode::BTR | - Opcode::BTC | - Opcode::BSF | - Opcode::BSR | - Opcode::BZHI | - Opcode::PDEP | - Opcode::PEXT | - Opcode::TZCNT | - Opcode::ANDN | - Opcode::BEXTR | - Opcode::BLSI | - Opcode::BLSMSK | - Opcode::BLSR | - Opcode::ADDPS | - Opcode::ADDPD | - Opcode::ANDNPS | - Opcode::ANDNPD | - Opcode::ANDPS | - Opcode::ANDPD | - Opcode::COMISD | - Opcode::COMISS | - Opcode::DIVPS | - Opcode::DIVPD | - Opcode::MULPS | - Opcode::MULPD | - Opcode::ORPS | - Opcode::ORPD | - Opcode::PADDB | - Opcode::PADDD | - Opcode::PADDQ | - Opcode::PADDSB | - Opcode::PADDSW | - Opcode::PADDUSB | - Opcode::PADDUSW | - Opcode::PADDW | - Opcode::PAND | - Opcode::PANDN | - Opcode::PAVGB | - Opcode::PAVGW | - Opcode::PMADDWD | - Opcode::PMULHUW | - Opcode::PMULHW | - Opcode::PMULLW | - Opcode::PMULUDQ | - Opcode::POR | - Opcode::PSADBW | - Opcode::PSHUFD | - Opcode::PSHUFW | - Opcode::PSHUFB | - Opcode::PSLLD | - Opcode::PSLLDQ | - Opcode::PSLLQ | - Opcode::PSLLW | - Opcode::PSRAD | - Opcode::PSRAW | - Opcode::PSRLD | - Opcode::PSRLDQ | - Opcode::PSRLQ | - Opcode::PSRLW | - Opcode::PSUBB | - Opcode::PSUBD | - Opcode::PSUBQ | - Opcode::PSUBSB | - Opcode::PSUBSW | - Opcode::PSUBUSB | - Opcode::PSUBUSW | - Opcode::PSUBW | - Opcode::PXOR | - Opcode::RSQRTPS | - Opcode::SQRTPS | - Opcode::SQRTPD | - Opcode::SUBPS | - Opcode::SUBPD | - Opcode::XORPS | - Opcode::XORPD | - Opcode::RCPPS | - Opcode::SHLD | - Opcode::SLHD | - Opcode::UCOMISD | - Opcode::UCOMISS | - Opcode::F2XM1 | - Opcode::FABS | - Opcode::FADD | - Opcode::FADDP | - Opcode::FCHS | - Opcode::FCOS | - Opcode::FDIV | - Opcode::FDIVP | - Opcode::FDIVR | - Opcode::FDIVRP | - Opcode::FIADD | - Opcode::FIDIV | - Opcode::FIDIVR | - Opcode::FIMUL | - Opcode::FISUB | - Opcode::FISUBR | - Opcode::FMUL | - Opcode::FMULP | - Opcode::FNCLEX | - Opcode::FNINIT | - Opcode::FPATAN | - Opcode::FPREM | - Opcode::FPREM1 | - Opcode::FPTAN | - Opcode::FRNDINT | - Opcode::FSCALE | - Opcode::FSIN | - Opcode::FSINCOS | - Opcode::FSQRT | - Opcode::FSUB | - Opcode::FSUBP | - Opcode::FSUBR | - Opcode::FSUBRP | - Opcode::FXTRACT | - Opcode::FYL2X | - Opcode::FYL2XP1 | - Opcode::AAA | - Opcode::AAS | - Opcode::DAS | - Opcode::DAA | - Opcode::AAD | - Opcode::AAM | - Opcode::KADDB | - Opcode::KANDB | - Opcode::KANDNB | - Opcode::KNOTB | - Opcode::KORB | - Opcode::KSHIFTLB | - Opcode::KSHIFTRB | - Opcode::KXNORB | - Opcode::KXORB | - Opcode::KADDW | - Opcode::KANDW | - Opcode::KANDNW | - Opcode::KNOTW | - Opcode::KORW | - Opcode::KSHIFTLW | - Opcode::KSHIFTRW | - Opcode::KXNORW | - Opcode::KXORW | - Opcode::KADDD | - Opcode::KANDD | - Opcode::KANDND | - Opcode::KNOTD | - Opcode::KORD | - Opcode::KSHIFTLD | - Opcode::KSHIFTRD | - Opcode::KXNORD | - Opcode::KXORD | - Opcode::KADDQ | - Opcode::KANDQ | - Opcode::KANDNQ | - Opcode::KNOTQ | - Opcode::KORQ | - Opcode::KSHIFTLQ | - Opcode::KSHIFTRQ | - Opcode::KXNORQ | - Opcode::KXORQ | - Opcode::IMUL => { write!(out, "{}", colors.arithmetic_op(self)) } - Opcode::POPF | - Opcode::PUSHF | - Opcode::ENTER | - Opcode::LEAVE | - Opcode::PUSHA | - Opcode::POPA | - Opcode::PUSH | - Opcode::POP => { write!(out, "{}", colors.stack_op(self)) } - Opcode::WAIT | - Opcode::FNOP | - Opcode::FDISI8087_NOP | - Opcode::FENI8087_NOP | - Opcode::FSETPM287_NOP | - Opcode::PREFETCHNTA | - Opcode::PREFETCH0 | - Opcode::PREFETCH1 | - Opcode::PREFETCH2 | - Opcode::PREFETCHW | - Opcode::NOP => { write!(out, "{}", colors.nop_op(self)) } - - /* Control flow */ - Opcode::HLT | - Opcode::INT | - Opcode::INTO | - Opcode::IRET | - Opcode::IRETD | - Opcode::IRETQ | - Opcode::RETF | - Opcode::RETURN => { write!(out, "{}", colors.stop_op(self)) } - Opcode::LOOPNZ | - Opcode::LOOPZ | - Opcode::LOOP | - Opcode::JCXZ | - Opcode::CALL | - Opcode::CALLF | - Opcode::JMP | - Opcode::JMPF | - Opcode::JO | - Opcode::JNO | - Opcode::JB | - Opcode::JNB | - Opcode::JZ | - Opcode::JNZ | - Opcode::JA | - Opcode::JNA | - Opcode::JS | - Opcode::JNS | - Opcode::JP | - Opcode::JNP | - Opcode::JL | - Opcode::JGE | - Opcode::JLE | - Opcode::JG => { write!(out, "{}", colors.control_flow_op(self)) } - - /* Data transfer */ - Opcode::PI2FW | - Opcode::PI2FD | - Opcode::PF2ID | - Opcode::PF2IW | - Opcode::VCVTDQ2PD | - Opcode::VCVTDQ2PS | - Opcode::VCVTPD2DQ | - Opcode::VCVTPD2PS | - Opcode::VCVTPH2PS | - Opcode::VCVTPS2DQ | - Opcode::VCVTPS2PD | - Opcode::VCVTPS2PH | - Opcode::VCVTTPD2DQ | - Opcode::VCVTTPS2DQ | - Opcode::VCVTSD2SI | - Opcode::VCVTSD2SS | - Opcode::VCVTSI2SD | - Opcode::VCVTSI2SS | - Opcode::VCVTSS2SD | - Opcode::VCVTSS2SI | - Opcode::VCVTTSD2SI | - Opcode::VCVTTSS2SI | - Opcode::VCVTPD2UDQ | - Opcode::VCVTTPD2UDQ | - Opcode::VCVTPS2UDQ | - Opcode::VCVTTPS2UDQ | - Opcode::VCVTQQ2PD | - Opcode::VCVTQQ2PS | - Opcode::VCVTSD2USI | - Opcode::VCVTTSD2USI | - Opcode::VCVTSS2USI | - Opcode::VCVTTSS2USI | - Opcode::VCVTUDQ2PD | - Opcode::VCVTUDQ2PS | - Opcode::VCVTUSI2USD | - Opcode::VCVTUSI2USS | - Opcode::VCVTTPD2QQ | - Opcode::VCVTPD2QQ | - Opcode::VCVTTPD2UQQ | - Opcode::VCVTPD2UQQ | - Opcode::VCVTTPS2QQ | - Opcode::VCVTPS2QQ | - Opcode::VCVTTPS2UQQ | - Opcode::VCVTPS2UQQ | - Opcode::VCVTUQQ2PD | - Opcode::VCVTUQQ2PS | - Opcode::VMOVDDUP | - Opcode::VPSHUFLW | - Opcode::VPSHUFHW | - Opcode::VBLENDMPD | - Opcode::VBLENDMPS | - Opcode::VPBLENDMD | - Opcode::VPBLENDMQ | - Opcode::VBLENDPD | - Opcode::VBLENDPS | - Opcode::VBLENDVPD | - Opcode::VBLENDVPS | - Opcode::VPBLENDMB | - Opcode::VPBLENDMW | - Opcode::PBLENDVB | - Opcode::PBLENDW | - Opcode::BLENDPD | - Opcode::BLENDPS | - Opcode::BLENDVPD | - Opcode::BLENDVPS | - Opcode::BLENDW | - Opcode::VBROADCASTF128 | - Opcode::VBROADCASTI128 | - Opcode::VBROADCASTSD | - Opcode::VBROADCASTSS | - Opcode::VPBROADCASTM | - Opcode::VEXTRACTF128 | - Opcode::VEXTRACTI128 | - Opcode::VEXTRACTPS | - Opcode::EXTRACTPS | - Opcode::VGATHERDPD | - Opcode::VGATHERDPS | - Opcode::VGATHERQPD | - Opcode::VGATHERQPS | - Opcode::VGATHERPF0DPD | - Opcode::VGATHERPF0DPS | - Opcode::VGATHERPF0QPD | - Opcode::VGATHERPF0QPS | - Opcode::VGATHERPF1DPD | - Opcode::VGATHERPF1DPS | - Opcode::VGATHERPF1QPD | - Opcode::VGATHERPF1QPS | - Opcode::VSCATTERDD | - Opcode::VSCATTERDQ | - Opcode::VSCATTERQD | - Opcode::VSCATTERQQ | - Opcode::VPSCATTERDD | - Opcode::VPSCATTERDQ | - Opcode::VPSCATTERQD | - Opcode::VPSCATTERQQ | - Opcode::VSCATTERPF0DPD | - Opcode::VSCATTERPF0DPS | - Opcode::VSCATTERPF0QPD | - Opcode::VSCATTERPF0QPS | - Opcode::VSCATTERPF1DPD | - Opcode::VSCATTERPF1DPS | - Opcode::VSCATTERPF1QPD | - Opcode::VSCATTERPF1QPS | - Opcode::VINSERTF128 | - Opcode::VINSERTI128 | - Opcode::VINSERTPS | - Opcode::INSERTPS | - Opcode::VEXTRACTF32X4 | - Opcode::VEXTRACTF64X2 | - Opcode::VEXTRACTF64X4 | - Opcode::VEXTRACTI32X4 | - Opcode::VEXTRACTI64X2 | - Opcode::VEXTRACTI64X4 | - Opcode::VINSERTF32X4 | - Opcode::VINSERTF64X2 | - Opcode::VINSERTF64X4 | - Opcode::VINSERTI64X2 | - Opcode::VINSERTI64X4 | - Opcode::VSHUFF32X4 | - Opcode::VSHUFF64X2 | - Opcode::VSHUFI32X4 | - Opcode::VSHUFI64X2 | - Opcode::VMASKMOVDQU | - Opcode::VMASKMOVPD | - Opcode::VMASKMOVPS | - Opcode::VMOVAPD | - Opcode::VMOVAPS | - Opcode::VMOVD | - Opcode::VMOVDQA | - Opcode::VMOVDQU | - Opcode::VMOVHLPS | - Opcode::VMOVHPD | - Opcode::VMOVHPS | - Opcode::VMOVLHPS | - Opcode::VMOVLPD | - Opcode::VMOVLPS | - Opcode::VMOVMSKPD | - Opcode::VMOVMSKPS | - Opcode::VMOVNTDQ | - Opcode::VMOVNTDQA | - Opcode::VMOVNTPD | - Opcode::VMOVNTPS | - Opcode::MOVDIR64B | - Opcode::MOVDIRI | - Opcode::MOVNTDQA | - Opcode::VMOVQ | - Opcode::VMOVSHDUP | - Opcode::VMOVSLDUP | - Opcode::VMOVUPD | - Opcode::VMOVUPS | - Opcode::VMOVSD | - Opcode::VMOVSS | - Opcode::VMOVDQA32 | - Opcode::VMOVDQA64 | - Opcode::VMOVDQU32 | - Opcode::VMOVDQU64 | - Opcode::VPMOVM2B | - Opcode::VPMOVM2W | - Opcode::VPMOVB2M | - Opcode::VPMOVW2M | - Opcode::VPMOVSWB | - Opcode::VPMOVUSWB | - Opcode::VPMOVSQB | - Opcode::VPMOVUSQB | - Opcode::VPMOVSQW | - Opcode::VPMOVUSQW | - Opcode::VPMOVSQD | - Opcode::VPMOVUSQD | - Opcode::VPMOVSDB | - Opcode::VPMOVUSDB | - Opcode::VPMOVSDW | - Opcode::VPMOVUSDW | - Opcode::VPMOVM2D | - Opcode::VPMOVM2Q | - Opcode::VPMOVB2D | - Opcode::VPMOVQ2M | - Opcode::VMOVDQU8 | - Opcode::VMOVDQU16 | - - Opcode::VPBLENDD | - Opcode::VPBLENDVB | - Opcode::VPBLENDW | - Opcode::VPBROADCASTB | - Opcode::VPBROADCASTD | - Opcode::VPBROADCASTQ | - Opcode::VPBROADCASTW | - Opcode::VPGATHERDD | - Opcode::VPGATHERDQ | - Opcode::VPGATHERQD | - Opcode::VPGATHERQQ | - Opcode::VPCLMULQDQ | - Opcode::VPMOVMSKB | - Opcode::VPMOVSXBD | - Opcode::VPMOVSXBQ | - Opcode::VPMOVSXBW | - Opcode::VPMOVSXDQ | - Opcode::VPMOVSXWD | - Opcode::VPMOVSXWQ | - Opcode::VPMOVZXBD | - Opcode::VPMOVZXBQ | - Opcode::VPMOVZXBW | - Opcode::VPMOVZXDQ | - Opcode::VPMOVZXWD | - Opcode::VPMOVZXWQ | - Opcode::PMOVSXBD | - Opcode::PMOVSXBQ | - Opcode::PMOVSXBW | - Opcode::PMOVSXDQ | - Opcode::PMOVSXWD | - Opcode::PMOVSXWQ | - Opcode::PMOVZXBD | - Opcode::PMOVZXBQ | - Opcode::PMOVZXBW | - Opcode::PMOVZXDQ | - Opcode::PMOVZXWD | - Opcode::PMOVZXWQ | - Opcode::KUNPCKBW | - Opcode::KUNPCKWD | - Opcode::KUNPCKDQ | - Opcode::VUNPCKHPD | - Opcode::VUNPCKHPS | - Opcode::VUNPCKLPD | - Opcode::VUNPCKLPS | - Opcode::VPUNPCKHBW | - Opcode::VPUNPCKHDQ | - Opcode::VPUNPCKHQDQ | - Opcode::VPUNPCKHWD | - Opcode::VPUNPCKLBW | - Opcode::VPUNPCKLDQ | - Opcode::VPUNPCKLQDQ | - Opcode::VPUNPCKLWD | - Opcode::VSHUFPD | - Opcode::VSHUFPS | - Opcode::VPACKSSDW | - Opcode::VPACKUSDW | - Opcode::PACKUSDW | - Opcode::VPACKSSWB | - Opcode::VPACKUSWB | - Opcode::VALIGND | - Opcode::VALIGNQ | - Opcode::VPALIGNR | - Opcode::PALIGNR | - Opcode::VPERM2F128 | - Opcode::VPERM2I128 | - Opcode::VPERMD | - Opcode::VPERMILPD | - Opcode::VPERMILPS | - Opcode::VPERMPD | - Opcode::VPERMPS | - Opcode::VPERMQ | - Opcode::VPERMI2D | - Opcode::VPERMI2Q | - Opcode::VPERMI2PD | - Opcode::VPERMI2PS | - Opcode::VPERMT2D | - Opcode::VPERMT2Q | - Opcode::VPERMT2PD | - Opcode::VPERMT2PS | - Opcode::VPERMI2B | - Opcode::VPERMI2W | - Opcode::VPERMW | - Opcode::VPEXTRB | - Opcode::VPEXTRD | - Opcode::VPEXTRQ | - Opcode::VPEXTRW | - Opcode::PEXTRB | - Opcode::PEXTRD | - Opcode::PEXTRQ | - Opcode::EXTRQ | - Opcode::PINSRB | - Opcode::PINSRD | - Opcode::PINSRQ | - Opcode::INSERTQ | - Opcode::VPINSRB | - Opcode::VPINSRD | - Opcode::VPINSRQ | - Opcode::VPINSRW | - Opcode::VPMASKMOVD | - Opcode::VPMASKMOVQ | - Opcode::VCOMPRESSPD | - Opcode::VCOMPRESSPS | - Opcode::VPCOMPRESSQ | - Opcode::VPCOMPRESSD | - Opcode::VEXPANDPD | - Opcode::VEXPANDPS | - Opcode::VPSHUFB | - Opcode::VPSHUFD | - Opcode::VPHMINPOSUW | - Opcode::PHMINPOSUW | - Opcode::VZEROUPPER | - Opcode::VZEROALL | - Opcode::VFIXUPIMMPD | - Opcode::VFIXUPIMMPS | - Opcode::VFIXUPIMMSD | - Opcode::VFIXUPIMMSS | - Opcode::VREDUCEPD | - Opcode::VREDUCEPS | - Opcode::VREDUCESD | - Opcode::VREDUCESS | - Opcode::VGETEXPPD | - Opcode::VGETEXPPS | - Opcode::VGETEXPSD | - Opcode::VGETEXPSS | - Opcode::VGETMANTPD | - Opcode::VGETMANTPS | - Opcode::VGETMANTSD | - Opcode::VGETMANTSS | - Opcode::VLDDQU | - Opcode::BSWAP | - Opcode::CVTDQ2PD | - Opcode::CVTDQ2PS | - Opcode::CVTPS2DQ | - Opcode::CVTPD2DQ | - Opcode::CVTPI2PS | - Opcode::CVTPI2PD | - Opcode::CVTPS2PD | - Opcode::CVTPD2PS | - Opcode::CVTPS2PI | - Opcode::CVTPD2PI | - Opcode::CVTSD2SI | - Opcode::CVTSD2SS | - Opcode::CVTSI2SD | - Opcode::CVTSI2SS | - Opcode::CVTSS2SD | - Opcode::CVTSS2SI | - Opcode::CVTTPD2DQ | - Opcode::CVTTPS2DQ | - Opcode::CVTTPS2PI | - Opcode::CVTTPD2PI | - Opcode::CVTTSD2SI | - Opcode::CVTTSS2SI | - Opcode::MASKMOVQ | - Opcode::MASKMOVDQU | - Opcode::MOVAPS | - Opcode::MOVAPD | - Opcode::MOVD | - Opcode::MOVHPS | - Opcode::MOVHPD | - Opcode::MOVHLPS | - Opcode::MOVLPS | - Opcode::MOVLPD | - Opcode::MOVLHPS | - Opcode::MOVMSKPS | - Opcode::MOVMSKPD | - Opcode::MOVNTI | - Opcode::MOVNTPS | - Opcode::MOVNTPD | - Opcode::MOVNTSS | - Opcode::MOVNTSD | - Opcode::MOVNTQ | - Opcode::MOVNTDQ | - Opcode::MOVSD | - Opcode::MOVSS | - Opcode::MOVUPD | - Opcode::PSHUFHW | - Opcode::PSHUFLW | - Opcode::PUNPCKHBW | - Opcode::PUNPCKHDQ | - Opcode::PUNPCKHWD | - Opcode::PUNPCKLBW | - Opcode::PUNPCKLDQ | - Opcode::PUNPCKLWD | - Opcode::PUNPCKLQDQ | - Opcode::PUNPCKHQDQ | - Opcode::PACKSSDW | - Opcode::PACKSSWB | - Opcode::PACKUSWB | - Opcode::UNPCKHPS | - Opcode::UNPCKHPD | - Opcode::UNPCKLPS | - Opcode::UNPCKLPD | - Opcode::SHUFPD | - Opcode::SHUFPS | - Opcode::PMOVMSKB | - Opcode::KMOVB | - Opcode::KMOVW | - Opcode::KMOVD | - Opcode::KMOVQ | - Opcode::BNDMOV | - Opcode::LDDQU | - Opcode::CMC | - Opcode::CLC | - Opcode::CLI | - Opcode::CLD | - Opcode::STC | - Opcode::STI | - Opcode::STD | - Opcode::CBW | - Opcode::CWDE | - Opcode::CDQE | - Opcode::CWD | - Opcode::CDQ | - Opcode::CQO | - Opcode::MOVDDUP | - Opcode::MOVSLDUP | - Opcode::MOVDQ2Q | - Opcode::MOVDQU | - Opcode::MOVDQA | - Opcode::MOVQ | - Opcode::MOVQ2DQ | - Opcode::MOVSHDUP | - Opcode::MOVUPS | - Opcode::PEXTRW | - Opcode::PINSRW | - Opcode::MOV | - Opcode::MOVBE | - Opcode::LODS | - Opcode::STOS | - Opcode::LAHF | - Opcode::SAHF | - Opcode::MOVS | - Opcode::INS | - Opcode::IN | - Opcode::OUTS | - Opcode::OUT | - Opcode::MOVZX | - Opcode::MOVSX | - Opcode::MOVSXD | - Opcode::FILD | - Opcode::FBLD | - Opcode::FBSTP | - Opcode::FIST | - Opcode::FISTP | - Opcode::FISTTP | - Opcode::FLD | - Opcode::FLD1 | - Opcode::FLDCW | - Opcode::FLDENV | - Opcode::FLDL2E | - Opcode::FLDL2T | - Opcode::FLDLG2 | - Opcode::FLDLN2 | - Opcode::FLDPI | - Opcode::FLDZ | - Opcode::FST | - Opcode::FSTP | - Opcode::FSTPNCE | - Opcode::FNSAVE | - Opcode::FNSTCW | - Opcode::FNSTENV | - Opcode::FNSTOR | - Opcode::FNSTSW | - Opcode::FRSTOR | - Opcode::FXCH | - Opcode::XCHG | - Opcode::XLAT | - Opcode::CMOVA | - Opcode::CMOVB | - Opcode::CMOVG | - Opcode::CMOVGE | - Opcode::CMOVL | - Opcode::CMOVLE | - Opcode::CMOVNA | - Opcode::CMOVNB | - Opcode::CMOVNO | - Opcode::CMOVNP | - Opcode::CMOVNS | - Opcode::CMOVNZ | - Opcode::CMOVO | - Opcode::CMOVP | - Opcode::CMOVS | - Opcode::CMOVZ | - Opcode::FCMOVB | - Opcode::FCMOVBE | - Opcode::FCMOVE | - Opcode::FCMOVNB | - Opcode::FCMOVNBE | - Opcode::FCMOVNE | - Opcode::FCMOVNU | - Opcode::FCMOVU | - Opcode::SALC | - Opcode::SETO | - Opcode::SETNO | - Opcode::SETB | - Opcode::SETAE | - Opcode::SETZ | - Opcode::SETNZ | - Opcode::SETBE | - Opcode::SETA | - Opcode::SETS | - Opcode::SETNS | - Opcode::SETP | - Opcode::SETNP | - Opcode::SETL | - Opcode::SETGE | - Opcode::SETLE | - Opcode::SETG => { write!(out, "{}", colors.data_op(self)) } - - Opcode::VCOMISD | - Opcode::VCOMISS | - Opcode::VUCOMISD | - Opcode::VUCOMISS | - Opcode::KORTESTB | - Opcode::KTESTB | - Opcode::KORTESTW | - Opcode::KTESTW | - Opcode::KORTESTD | - Opcode::KTESTD | - Opcode::KORTESTQ | - Opcode::KTESTQ | - Opcode::VPTESTNMD | - Opcode::VPTESTNMQ | - Opcode::VPTERNLOGD | - Opcode::VPTERNLOGQ | - Opcode::VPTESTMD | - Opcode::VPTESTMQ | - Opcode::VPTESTNMB | - Opcode::VPTESTNMW | - Opcode::VPTESTMB | - Opcode::VPTESTMW | - Opcode::VPCMPD | - Opcode::VPCMPUD | - Opcode::VPCMPQ | - Opcode::VPCMPUQ | - Opcode::VPCMPB | - Opcode::VPCMPUB | - Opcode::VPCMPW | - Opcode::VPCMPUW | - Opcode::VCMPPD | - Opcode::VCMPPS | - Opcode::VCMPSD | - Opcode::VCMPSS | - Opcode::VMAXPD | - Opcode::VMAXPS | - Opcode::VMAXSD | - Opcode::VMAXSS | - Opcode::VPMAXSQ | - Opcode::VPMAXUQ | - Opcode::VPMINSQ | - Opcode::VPMINUQ | - Opcode::VMINPD | - Opcode::VMINPS | - Opcode::VMINSD | - Opcode::VMINSS | - Opcode::VPCMPEQB | - Opcode::VPCMPEQD | - Opcode::VPCMPEQQ | - Opcode::VPCMPEQW | - Opcode::VPCMPGTB | - Opcode::VPCMPGTD | - Opcode::VPCMPGTQ | - Opcode::VPCMPGTW | - Opcode::VPCMPESTRI | - Opcode::VPCMPESTRM | - Opcode::VPCMPISTRI | - Opcode::VPCMPISTRM | - Opcode::VPMAXSB | - Opcode::VPMAXSD | - Opcode::VPMAXSW | - Opcode::VPMAXUB | - Opcode::VPMAXUW | - Opcode::VPMAXUD | - Opcode::VPMINSB | - Opcode::VPMINSW | - Opcode::VPMINSD | - Opcode::VPMINUB | - Opcode::VPMINUW | - Opcode::VPMINUD | - Opcode::VFPCLASSPD | - Opcode::VFPCLASSPS | - Opcode::VFPCLASSSD | - Opcode::VFPCLASSSS | - Opcode::VRANGEPD | - Opcode::VRANGEPS | - Opcode::VRANGESD | - Opcode::VRANGESS | - Opcode::VPCONFLICTD | - Opcode::VPCONFLICTQ | - Opcode::VPTEST | - Opcode::VTESTPD | - Opcode::VTESTPS | - Opcode::PCMPEQB | - Opcode::PCMPEQD | - Opcode::PCMPEQQ | - Opcode::PCMPEQW | - Opcode::PCMPESTRI | - Opcode::PCMPESTRM | - Opcode::PCMPGTB | - Opcode::PCMPGTD | - Opcode::PCMPGTQ | - Opcode::PCMPGTW | - Opcode::PCMPISTRI | - Opcode::PCMPISTRM | - Opcode::PTEST | - Opcode::MAXPD | - Opcode::MAXPS | - Opcode::MAXSD | - Opcode::MAXSS | - Opcode::MINPD | - Opcode::MINPS | - Opcode::MINSD | - Opcode::MINSS | - Opcode::PMAXSB | - Opcode::PMAXSD | - Opcode::PMAXSW | - Opcode::PMAXUB | - Opcode::PMAXUD | - Opcode::PMAXUW | - Opcode::PMINSB | - Opcode::PMINSD | - Opcode::PMINSW | - Opcode::PMINUB | - Opcode::PMINUD | - Opcode::PMINUW | - Opcode::PFCMPGE | - Opcode::PFMIN | - Opcode::PFCMPGT | - Opcode::PFMAX | - Opcode::PFCMPEQ | - Opcode::CMPS | - Opcode::SCAS | - Opcode::TEST | - Opcode::FTST | - Opcode::FXAM | - Opcode::FUCOM | - Opcode::FUCOMI | - Opcode::FUCOMIP | - Opcode::FUCOMP | - Opcode::FUCOMPP | - Opcode::FCOM | - Opcode::FCOMI | - Opcode::FCOMIP | - Opcode::FCOMP | - Opcode::FCOMPP | - Opcode::FICOM | - Opcode::FICOMP | - Opcode::CMPSD | - Opcode::CMPSS | - Opcode::CMP | - Opcode::CMPPS | - Opcode::CMPPD | - Opcode::CMPXCHG8B | - Opcode::CMPXCHG16B | - Opcode::CMPXCHG => { write!(out, "{}", colors.comparison_op(self)) } - - Opcode::WRMSR | - Opcode::RDMSR | - Opcode::RDTSC | - Opcode::RDPMC | - Opcode::RDPID | - Opcode::RDFSBASE | - Opcode::RDGSBASE | - Opcode::WRFSBASE | - Opcode::WRGSBASE | - Opcode::FXSAVE | - Opcode::FXRSTOR | - Opcode::LDMXCSR | - Opcode::STMXCSR | - Opcode::VLDMXCSR | - Opcode::VSTMXCSR | - Opcode::XSAVE | - Opcode::XSAVEC | - Opcode::XSAVES | - Opcode::XSAVEC64 | - Opcode::XSAVES64 | - Opcode::XRSTOR | - Opcode::XRSTORS | - Opcode::XRSTORS64 | - Opcode::XSAVEOPT | - Opcode::LFENCE | - Opcode::MFENCE | - Opcode::SFENCE | - Opcode::CLFLUSH | - Opcode::CLFLUSHOPT | - Opcode::CLWB | - Opcode::LDS | - Opcode::LES | - Opcode::SGDT | - Opcode::SIDT | - Opcode::LGDT | - Opcode::LIDT | - Opcode::SMSW | - Opcode::LMSW | - Opcode::SWAPGS | - Opcode::RDTSCP | - Opcode::INVEPT | - Opcode::INVVPID | - Opcode::INVPCID | - Opcode::INVLPG | - Opcode::INVLPGA | - Opcode::INVLPGB | - Opcode::TLBSYNC | - Opcode::PSMASH | - Opcode::PVALIDATE | - Opcode::RMPADJUST | - Opcode::RMPUPDATE | - Opcode::CPUID | - Opcode::WBINVD | - Opcode::INVD | - Opcode::SYSRET | - Opcode::CLTS | - Opcode::SYSCALL | - Opcode::TDCALL | - Opcode::SEAMRET | - Opcode::SEAMOPS | - Opcode::SEAMCALL | - Opcode::TPAUSE | - Opcode::UMONITOR | - Opcode::UMWAIT | - Opcode::LSL | - Opcode::SLDT | - Opcode::STR | - Opcode::LLDT | - Opcode::LTR | - Opcode::VERR | - Opcode::VERW | - Opcode::JMPE | - Opcode::EMMS | - Opcode::FEMMS | - Opcode::GETSEC | - Opcode::LFS | - Opcode::LGS | - Opcode::LSS | - Opcode::RSM | - Opcode::SYSENTER | - Opcode::SYSEXIT | - Opcode::VMREAD | - Opcode::VMWRITE | - Opcode::VMCLEAR | - Opcode::VMPTRLD | - Opcode::VMPTRST | - Opcode::VMXON | - Opcode::VMCALL | - Opcode::VMLAUNCH | - Opcode::VMRESUME | - Opcode::VMLOAD | - Opcode::VMMCALL | - Opcode::VMSAVE | - Opcode::VMRUN | - Opcode::VMXOFF | - Opcode::PCONFIG | - Opcode::MONITOR | - Opcode::MWAIT | - Opcode::MONITORX | - Opcode::MWAITX | - Opcode::SKINIT | - Opcode::CLGI | - Opcode::STGI | - Opcode::CLAC | - Opcode::STAC | - Opcode::ENCLS | - Opcode::ENCLV | - Opcode::XGETBV | - Opcode::XSETBV | - Opcode::VMFUNC | - Opcode::XEND | - Opcode::XTEST | - Opcode::XABORT | - Opcode::XBEGIN | - Opcode::ENCLU | - Opcode::RDPKRU | - Opcode::WRPKRU | - Opcode::RDPRU | - Opcode::CLZERO | - Opcode::ENQCMD | - Opcode::ENQCMDS | - Opcode::PTWRITE | - Opcode::UIRET | - Opcode::TESTUI | - Opcode::CLUI | - Opcode::STUI | - Opcode::SENDUIPI | - Opcode::XSUSLDTRK | - Opcode::XRESLDTRK | - Opcode::BOUND | - Opcode::ARPL | - Opcode::BNDMK | - Opcode::BNDCL | - Opcode::BNDCU | - Opcode::BNDCN | - Opcode::BNDLDX | - Opcode::BNDSTX | - Opcode::LAR => { write!(out, "{}", colors.platform_op(self)) } - - Opcode::CRC32 | - Opcode::RDSEED | - Opcode::RDRAND | - Opcode::SHA1RNDS4 | - Opcode::SHA1NEXTE | - Opcode::SHA1MSG1 | - Opcode::SHA1MSG2 | - Opcode::SHA256RNDS2 | - Opcode::SHA256MSG1 | - Opcode::SHA256MSG2 | - Opcode::FFREE | - Opcode::FFREEP | - Opcode::FDECSTP | - Opcode::FINCSTP | - Opcode::GF2P8MULB | - Opcode::GF2P8AFFINEQB | - Opcode::GF2P8AFFINEINVQB | - Opcode::AESDEC128KL | - Opcode::AESDEC256KL | - Opcode::AESDECWIDE128KL | - Opcode::AESDECWIDE256KL | - Opcode::AESENC128KL | - Opcode::AESENC256KL | - Opcode::AESENCWIDE128KL | - Opcode::AESENCWIDE256KL | - Opcode::ENCODEKEY128 | - Opcode::ENCODEKEY256 | - Opcode::LOADIWKEY | - Opcode::HRESET | - Opcode::WRUSS | - Opcode::WRSS | - Opcode::INCSSP | - Opcode::SAVEPREVSSP | - Opcode::SETSSBSY | - Opcode::CLRSSBSY | - Opcode::RSTORSSP | - Opcode::ENDBR64 | - Opcode::ENDBR32 | - Opcode::AESDEC | - Opcode::AESDECLAST | - Opcode::AESENC | - Opcode::AESENCLAST | - Opcode::AESIMC | - Opcode::AESKEYGENASSIST | - Opcode::VAESDEC | - Opcode::VAESDECLAST | - Opcode::VAESENC | - Opcode::VAESENCLAST | - Opcode::VAESIMC | - Opcode::VAESKEYGENASSIST => { write!(out, "{}", colors.misc_op(self)) } - - Opcode::UD0 | - Opcode::UD1 | - Opcode::UD2 | - Opcode::Invalid => { write!(out, "{}", colors.invalid_op(self)) } - } + fn colorize(&self, _colors: &Y, out: &mut T) -> fmt::Result { + // see `impl Colorize for long_mode::Opcode for more about this + out.write_str(self.name()) } } impl fmt::Display for Instruction { fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + // to reuse one implementation, call the deprecated function for now. + #[allow(deprecated)] self.display_with(DisplayStyle::Intel).colorize(&NoColors, fmt) } } impl<'instr> fmt::Display for InstructionDisplayer<'instr> { fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + // to reuse one implementation, call the deprecated function for now. + #[allow(deprecated)] self.colorize(&NoColors, fmt) } } @@ -3623,6 +2200,8 @@ pub struct InstructionDisplayer<'instr> { * so write to some Write thing i guess. bite me. i really just want to * stop thinking about how to support printing instructions... */ +// allowing these deprecated items for the time being, not yet breaking yaxpeax-x86 apis +#[allow(deprecated)] impl <'instr, T: fmt::Write, Y: YaxColors> Colorize for InstructionDisplayer<'instr> { fn colorize(&self, colors: &Y, out: &mut T) -> fmt::Result { // TODO: I DONT LIKE THIS, there is no address i can give contextualize here, @@ -3635,8 +2214,18 @@ impl <'instr, T: fmt::Write, Y: YaxColors> Colorize for InstructionDisplay struct NoContext; impl Instruction { + /// format this instruction into `out` as a plain text string. + #[cfg_attr(feature="profiling", inline(never))] pub fn write_to(&self, out: &mut T) -> fmt::Result { - self.display_with(DisplayStyle::Intel).contextualize(&NoColors, 0, Some(&NoContext), out) + let mut out = yaxpeax_arch::display::FmtSink::new(out); + contextualize_intel(self, &mut out) + } + + /// format this instruction into `out`, which may perform additional styling based on its + /// `DisplaySink` implementation. + #[cfg_attr(feature="profiling", inline(never))] + pub fn display_into(&self, out: &mut T) -> fmt::Result { + contextualize_intel(self, out) } } @@ -4123,6 +2712,8 @@ pub(crate) fn contextualize_c(instr: &Instruction, out: &mut T) Ok(()) } +// allowing these deprecated items for the time being, not yet breaking yaxpeax-x86 apis +#[allow(deprecated)] impl <'instr, T: fmt::Write, Y: YaxColors> ShowContextual for InstructionDisplayer<'instr> { fn contextualize(&self, _colors: &Y, _address: u32, _context: Option<&NoContext>, out: &mut T) -> fmt::Result { let InstructionDisplayer { @@ -4143,6 +2734,8 @@ impl <'instr, T: fmt::Write, Y: YaxColors> ShowContextual } } +// allowing these deprecated items for the time being, not yet breaking yaxpeax-x86 apis +#[allow(deprecated)] #[cfg(feature="std")] impl ShowContextual], T, Y> for Instruction { fn contextualize(&self, colors: &Y, _address: u64, context: Option<&[Option]>, out: &mut T) -> fmt::Result { From 2002347272391dc6a70d83fe8293f2ce35ed26ee Mon Sep 17 00:00:00 2001 From: iximeow Date: Sun, 23 Jun 2024 15:15:50 -0700 Subject: [PATCH 75/95] add additional `call` test cases fix 32-bit 66-prefixed ff /2 call not having 16-bit operands fix momentary regression in rendering `call` instructions to string --- CHANGELOG | 5 +++++ src/long_mode/display.rs | 5 ++--- src/long_mode/mod.rs | 2 ++ src/protected_mode/display.rs | 5 ++--- src/protected_mode/mod.rs | 7 ++----- src/real_mode/display.rs | 5 ++--- test/long_mode/mod.rs | 11 +++++++++++ test/protected_mode/mod.rs | 13 ++++++++++++- test/real_mode/mod.rs | 3 +++ 9 files changed, 41 insertions(+), 15 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index d420ed0..590f731 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,8 @@ +## 1.3.0 + +* fix 32-bit call/jmp not respecting 66 prefix if set - such cases use 16-bit + operands, but decoded as if they used 32-bit operands. + ## 1.2.2 * fix `hreset` reporting two operands, with a second operand of `Nothing`. diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index f765fb7..4f4e739 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -4245,9 +4245,8 @@ impl ShowContextual ShowContextual ShowContextual Date: Sun, 23 Jun 2024 15:18:04 -0700 Subject: [PATCH 76/95] stale file --- src/display.rs | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 src/display.rs diff --git a/src/display.rs b/src/display.rs deleted file mode 100644 index e69de29..0000000 From 9d9bb9b8fbc5a04f40b927093cd3ba8e562f941d Mon Sep 17 00:00:00 2001 From: iximeow Date: Sun, 23 Jun 2024 15:28:50 -0700 Subject: [PATCH 77/95] InstructionTextBuffer for all three modes, adjust fuzzer to match --- fuzz/fuzz_targets/display_does_not_panic.rs | 18 +++- src/protected_mode/display.rs | 104 ++++++++++++++++++++ src/protected_mode/mod.rs | 2 + src/real_mode/display.rs | 104 ++++++++++++++++++++ src/real_mode/mod.rs | 2 + 5 files changed, 227 insertions(+), 3 deletions(-) diff --git a/fuzz/fuzz_targets/display_does_not_panic.rs b/fuzz/fuzz_targets/display_does_not_panic.rs index 97a14b8..39f5753 100644 --- a/fuzz/fuzz_targets/display_does_not_panic.rs +++ b/fuzz/fuzz_targets/display_does_not_panic.rs @@ -8,14 +8,26 @@ fuzz_target!(|data: &[u8]| { let x86_16_decoder = yaxpeax_x86::real_mode::InstDecoder::default(); if let Ok(inst) = x86_64_decoder.decode_slice(data) { - inst.write_to(&mut String::new()).expect("format does not panic"); + let mut out = String::new(); + inst.write_to(&mut out).expect("format does not panic"); + let mut text_buf = yaxpeax_x86::long_mode::InstructionTextBuffer::new(); + text_buf.format_inst(&inst.display_with(yaxpeax_x86::long_mode::DisplayStyle::Intel)).expect("can format"); + assert_eq!(text_buf.text_str(), out); }; if let Ok(inst) = x86_32_decoder.decode_slice(data) { - inst.write_to(&mut String::new()).expect("format does not panic"); + let mut out = String::new(); + inst.write_to(&mut out).expect("format does not panic"); + let mut text_buf = yaxpeax_x86::protected_mode::InstructionTextBuffer::new(); + text_buf.format_inst(&inst.display_with(yaxpeax_x86::protected_mode::DisplayStyle::Intel)).expect("can format"); + assert_eq!(text_buf.text_str(), out); }; if let Ok(inst) = x86_16_decoder.decode_slice(data) { - inst.write_to(&mut String::new()).expect("format does not panic"); + let mut out = String::new(); + inst.write_to(&mut out).expect("format does not panic"); + let mut text_buf = yaxpeax_x86::real_mode::InstructionTextBuffer::new(); + text_buf.format_inst(&inst.display_with(yaxpeax_x86::real_mode::DisplayStyle::Intel)).expect("can format"); + assert_eq!(text_buf.text_str(), out); }; }); diff --git a/src/protected_mode/display.rs b/src/protected_mode/display.rs index 1db34cb..0291246 100644 --- a/src/protected_mode/display.rs +++ b/src/protected_mode/display.rs @@ -2942,3 +2942,107 @@ impl<'a, F: DisplaySink> super::OperandVisitor for RelativeBranchPrinter<'a, F> Ok(false) } } + +#[cfg(feature="alloc")] +mod buffer_sink { + use core::fmt; + use super::super::{DisplayStyle, InstructionDisplayer}; + use super::{contextualize_c, contextualize_intel}; + + /// helper to format `amd64` instructions with highest throughput and least configuration. this is + /// functionally a buffer for one x86 instruction's text. + /// + /// ### when to use this over `fmt::Display`? + /// + /// `fmt::Display` is a fair choice in most cases. in some cases, `InstructionTextBuffer` may + /// support formatting options that may be difficult to configure for a `Display` impl. + /// additionally, `InstructionTextBuffer` may be able to specialize more effectively where + /// `fmt::Display`, writing to a generic `fmt::Write`, may not. + /// + /// if your use case for `yaxpeax-x86` involves being bounded on the speed of disassembling and + /// formatting instructions, [`InstructionTextBuffer::format_inst`] has been measured as up to 11% + /// faster than an equivalent `write!(buf, "{}", inst)`. + /// + /// `InstructionTextBuffer` involves internal allocations; if your use case for `yaxpeax-x86` + /// requires allocations never occurring, it is not an appropriate tool. + /// + /// ### example + /// + /// ``` + /// use yaxpeax_x86::long_mode::InstDecoder; + /// use yaxpeax_x86::long_mode::InstructionTextBuffer; + /// use yaxpeax_x86::long_mode::DisplayStyle; + /// + /// let bytes = &[0x33, 0xc0]; + /// let inst = InstDecoder::default().decode_slice(bytes).expect("can decode"); + /// let mut text_buf = InstructionTextBuffer::new(); + /// assert_eq!( + /// text_buf.format_inst(&inst.display_with(DisplayStyle::Intel)).expect("can format"), + /// "xor eax, eax" + /// ); + /// + /// // or, getting the formatted instruction with `text_str`: + /// assert_eq!( + /// text_buf.text_str(), + /// "xor eax, eax" + /// ); + /// ``` + pub struct InstructionTextBuffer { + content: alloc::string::String, + } + + impl InstructionTextBuffer { + /// create an `InstructionTextBuffer` with default settings. `InstructionTextBuffer`'s default + /// settings format instructions identically to their corresponding `fmt::Display`. + pub fn new() -> Self { + let mut buf = alloc::string::String::new(); + // TODO: move 512 out to a MAX_INSTRUCTION_LEN const and appropriate justification (and + // fuzzing and ..) + buf.reserve(512); + Self { + content: buf, + } + } + + /// format `inst` into this buffer. returns a borrow of that same internal buffer for convenience. + /// + /// this clears and reuses an internal buffer; if an instruction had been previously formatted + /// through this buffer, it will be overwritten. + pub fn format_inst<'buf, 'instr>(&'buf mut self, display: &InstructionDisplayer<'instr>) -> Result<&'buf str, fmt::Error> { + // Safety: this sink is used to format exactly one instruction and then dropped. it can + // never escape `format_inst`. + let mut handle = unsafe { self.write_handle() }; + + match display.style { + DisplayStyle::Intel => { + contextualize_intel(&display.instr, &mut handle)?; + } + DisplayStyle::C => { + contextualize_c(&display.instr, &mut handle)?; + } + } + + Ok(self.text_str()) + } + + /// return a borrow of the internal buffer. if an instruction has been formatted, the + /// returned `&str` contains that instruction's buffered text. + pub fn text_str(&self) -> &str { + self.content.as_str() + } + + /// do the necessary bookkeeping and provide an `InstructionTextSink` to write an instruction + /// into. + /// + /// SAFETY: callers must print at most one instruction into this handle. + unsafe fn write_handle(&mut self) -> yaxpeax_arch::display::InstructionTextSink { + self.content.clear(); + // Safety: `content` was just cleared, so writing begins at the start of the buffer. + // `content`is large enough to hold a fully-formatted instruction (see + // `InstructionTextBuffer::new`). + yaxpeax_arch::display::InstructionTextSink::new(&mut self.content) + } + } +} +#[cfg(feature="alloc")] +pub use buffer_sink::InstructionTextBuffer; diff --git a/src/protected_mode/mod.rs b/src/protected_mode/mod.rs index 38f8310..6d9031a 100644 --- a/src/protected_mode/mod.rs +++ b/src/protected_mode/mod.rs @@ -8,6 +8,8 @@ pub use crate::MemoryAccessSize; #[cfg(feature = "fmt")] pub use self::display::{DisplayStyle, InstructionDisplayer}; +#[cfg(all(feature = "fmt", feature = "alloc"))] +pub use self::display::InstructionTextBuffer; use core::cmp::PartialEq; use crate::safer_unchecked::unreachable_kinda_unchecked as unreachable_unchecked; diff --git a/src/real_mode/display.rs b/src/real_mode/display.rs index 90b4f0a..fdd51cf 100644 --- a/src/real_mode/display.rs +++ b/src/real_mode/display.rs @@ -2942,3 +2942,107 @@ impl<'a, F: DisplaySink> super::OperandVisitor for RelativeBranchPrinter<'a, F> Ok(false) } } + +#[cfg(feature="alloc")] +mod buffer_sink { + use core::fmt; + use super::super::{DisplayStyle, InstructionDisplayer}; + use super::{contextualize_c, contextualize_intel}; + + /// helper to format `amd64` instructions with highest throughput and least configuration. this is + /// functionally a buffer for one x86 instruction's text. + /// + /// ### when to use this over `fmt::Display`? + /// + /// `fmt::Display` is a fair choice in most cases. in some cases, `InstructionTextBuffer` may + /// support formatting options that may be difficult to configure for a `Display` impl. + /// additionally, `InstructionTextBuffer` may be able to specialize more effectively where + /// `fmt::Display`, writing to a generic `fmt::Write`, may not. + /// + /// if your use case for `yaxpeax-x86` involves being bounded on the speed of disassembling and + /// formatting instructions, [`InstructionTextBuffer::format_inst`] has been measured as up to 11% + /// faster than an equivalent `write!(buf, "{}", inst)`. + /// + /// `InstructionTextBuffer` involves internal allocations; if your use case for `yaxpeax-x86` + /// requires allocations never occurring, it is not an appropriate tool. + /// + /// ### example + /// + /// ``` + /// use yaxpeax_x86::long_mode::InstDecoder; + /// use yaxpeax_x86::long_mode::InstructionTextBuffer; + /// use yaxpeax_x86::long_mode::DisplayStyle; + /// + /// let bytes = &[0x33, 0xc0]; + /// let inst = InstDecoder::default().decode_slice(bytes).expect("can decode"); + /// let mut text_buf = InstructionTextBuffer::new(); + /// assert_eq!( + /// text_buf.format_inst(&inst.display_with(DisplayStyle::Intel)).expect("can format"), + /// "xor eax, eax" + /// ); + /// + /// // or, getting the formatted instruction with `text_str`: + /// assert_eq!( + /// text_buf.text_str(), + /// "xor eax, eax" + /// ); + /// ``` + pub struct InstructionTextBuffer { + content: alloc::string::String, + } + + impl InstructionTextBuffer { + /// create an `InstructionTextBuffer` with default settings. `InstructionTextBuffer`'s default + /// settings format instructions identically to their corresponding `fmt::Display`. + pub fn new() -> Self { + let mut buf = alloc::string::String::new(); + // TODO: move 512 out to a MAX_INSTRUCTION_LEN const and appropriate justification (and + // fuzzing and ..) + buf.reserve(512); + Self { + content: buf, + } + } + + /// format `inst` into this buffer. returns a borrow of that same internal buffer for convenience. + /// + /// this clears and reuses an internal buffer; if an instruction had been previously formatted + /// through this buffer, it will be overwritten. + pub fn format_inst<'buf, 'instr>(&'buf mut self, display: &InstructionDisplayer<'instr>) -> Result<&'buf str, fmt::Error> { + // Safety: this sink is used to format exactly one instruction and then dropped. it can + // never escape `format_inst`. + let mut handle = unsafe { self.write_handle() }; + + match display.style { + DisplayStyle::Intel => { + contextualize_intel(&display.instr, &mut handle)?; + } + DisplayStyle::C => { + contextualize_c(&display.instr, &mut handle)?; + } + } + + Ok(self.text_str()) + } + + /// return a borrow of the internal buffer. if an instruction has been formatted, the + /// returned `&str` contains that instruction's buffered text. + pub fn text_str(&self) -> &str { + self.content.as_str() + } + + /// do the necessary bookkeeping and provide an `InstructionTextSink` to write an instruction + /// into. + /// + /// SAFETY: callers must print at most one instruction into this handle. + unsafe fn write_handle(&mut self) -> yaxpeax_arch::display::InstructionTextSink { + self.content.clear(); + // Safety: `content` was just cleared, so writing begins at the start of the buffer. + // `content`is large enough to hold a fully-formatted instruction (see + // `InstructionTextBuffer::new`). + yaxpeax_arch::display::InstructionTextSink::new(&mut self.content) + } + } +} +#[cfg(feature="alloc")] +pub use buffer_sink::InstructionTextBuffer; diff --git a/src/real_mode/mod.rs b/src/real_mode/mod.rs index b1aaf7d..162b380 100644 --- a/src/real_mode/mod.rs +++ b/src/real_mode/mod.rs @@ -8,6 +8,8 @@ pub use crate::MemoryAccessSize; #[cfg(feature = "fmt")] pub use self::display::{DisplayStyle, InstructionDisplayer}; +#[cfg(all(feature = "fmt", feature = "alloc"))] +pub use self::display::InstructionTextBuffer; use core::cmp::PartialEq; use crate::safer_unchecked::unreachable_kinda_unchecked as unreachable_unchecked; From 1fdd243276d7184bbc0acb0bda349283cf8cfbda Mon Sep 17 00:00:00 2001 From: iximeow Date: Sun, 23 Jun 2024 15:31:25 -0700 Subject: [PATCH 78/95] fuzz caught negation bug --- src/long_mode/display.rs | 14 +++++++------- src/protected_mode/display.rs | 14 +++++++------- src/real_mode/display.rs | 14 +++++++------- 3 files changed, 21 insertions(+), 21 deletions(-) diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index 4f4e739..37f832a 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -439,7 +439,7 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> let mut v = disp as u32; if disp < 0 { self.f.write_fixed_size("- 0x")?; - v = -disp as u32; + v = disp.unsigned_abs(); } else { self.f.write_fixed_size("+ 0x")?; } @@ -472,7 +472,7 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> let mut v = disp as u32; if disp < 0 { self.f.write_fixed_size("- 0x")?; - v = -disp as u32; + v = disp.unsigned_abs(); } else { self.f.write_fixed_size("+ 0x")?; } @@ -502,7 +502,7 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> let mut v = disp as u32; if disp < 0 { self.f.write_fixed_size("- 0x")?; - v = -disp as u32; + v = disp.unsigned_abs(); } else { self.f.write_fixed_size("+ 0x")?; } @@ -517,7 +517,7 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> let mut v = disp as u32; if disp < 0 { self.f.write_fixed_size("- 0x")?; - v = -disp as u32; + v = disp.unsigned_abs(); } else { self.f.write_fixed_size("+ 0x")?; } @@ -557,7 +557,7 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> let mut v = disp as u32; if disp < 0 { self.f.write_fixed_size("- 0x")?; - v = -disp as u32; + v = disp.unsigned_abs(); } else { self.f.write_fixed_size("+ 0x")?; } @@ -588,7 +588,7 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> let mut v = disp as u32; if disp < 0 { self.f.write_fixed_size("- 0x")?; - v = -disp as u32; + v = disp.unsigned_abs(); } else { self.f.write_fixed_size("+ 0x")?; } @@ -623,7 +623,7 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> let mut v = disp as u32; if disp < 0 { self.f.write_fixed_size("- 0x")?; - v = -disp as u32; + v = disp.unsigned_abs(); } else { self.f.write_fixed_size("+ 0x")?; } diff --git a/src/protected_mode/display.rs b/src/protected_mode/display.rs index 0291246..bdc0e95 100644 --- a/src/protected_mode/display.rs +++ b/src/protected_mode/display.rs @@ -415,7 +415,7 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> let mut v = disp as u32; if disp < 0 { self.f.write_fixed_size("- 0x")?; - v = -disp as u32; + v = disp.unsigned_abs(); } else { self.f.write_fixed_size("+ 0x")?; } @@ -448,7 +448,7 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> let mut v = disp as u32; if disp < 0 { self.f.write_fixed_size("- 0x")?; - v = -disp as u32; + v = disp.unsigned_abs(); } else { self.f.write_fixed_size("+ 0x")?; } @@ -478,7 +478,7 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> let mut v = disp as u32; if disp < 0 { self.f.write_fixed_size("- 0x")?; - v = -disp as u32; + v = disp.unsigned_abs(); } else { self.f.write_fixed_size("+ 0x")?; } @@ -493,7 +493,7 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> let mut v = disp as u32; if disp < 0 { self.f.write_fixed_size("- 0x")?; - v = -disp as u32; + v = disp.unsigned_abs(); } else { self.f.write_fixed_size("+ 0x")?; } @@ -533,7 +533,7 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> let mut v = disp as u32; if disp < 0 { self.f.write_fixed_size("- 0x")?; - v = -disp as u32; + v = disp.unsigned_abs(); } else { self.f.write_fixed_size("+ 0x")?; } @@ -564,7 +564,7 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> let mut v = disp as u32; if disp < 0 { self.f.write_fixed_size("- 0x")?; - v = -disp as u32; + v = disp.unsigned_abs(); } else { self.f.write_fixed_size("+ 0x")?; } @@ -599,7 +599,7 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> let mut v = disp as u32; if disp < 0 { self.f.write_fixed_size("- 0x")?; - v = -disp as u32; + v = disp.unsigned_abs(); } else { self.f.write_fixed_size("+ 0x")?; } diff --git a/src/real_mode/display.rs b/src/real_mode/display.rs index fdd51cf..8de8168 100644 --- a/src/real_mode/display.rs +++ b/src/real_mode/display.rs @@ -415,7 +415,7 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> let mut v = disp as u32; if disp < 0 { self.f.write_fixed_size("- 0x")?; - v = -disp as u32; + v = disp.unsigned_abs(); } else { self.f.write_fixed_size("+ 0x")?; } @@ -448,7 +448,7 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> let mut v = disp as u32; if disp < 0 { self.f.write_fixed_size("- 0x")?; - v = -disp as u32; + v = disp.unsigned_abs(); } else { self.f.write_fixed_size("+ 0x")?; } @@ -478,7 +478,7 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> let mut v = disp as u32; if disp < 0 { self.f.write_fixed_size("- 0x")?; - v = -disp as u32; + v = disp.unsigned_abs(); } else { self.f.write_fixed_size("+ 0x")?; } @@ -493,7 +493,7 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> let mut v = disp as u32; if disp < 0 { self.f.write_fixed_size("- 0x")?; - v = -disp as u32; + v = disp.unsigned_abs(); } else { self.f.write_fixed_size("+ 0x")?; } @@ -533,7 +533,7 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> let mut v = disp as u32; if disp < 0 { self.f.write_fixed_size("- 0x")?; - v = -disp as u32; + v = disp.unsigned_abs(); } else { self.f.write_fixed_size("+ 0x")?; } @@ -564,7 +564,7 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> let mut v = disp as u32; if disp < 0 { self.f.write_fixed_size("- 0x")?; - v = -disp as u32; + v = disp.unsigned_abs(); } else { self.f.write_fixed_size("+ 0x")?; } @@ -599,7 +599,7 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> let mut v = disp as u32; if disp < 0 { self.f.write_fixed_size("- 0x")?; - v = -disp as u32; + v = disp.unsigned_abs(); } else { self.f.write_fixed_size("+ 0x")?; } From 24d5384f610ae33439a73493e6ed3c842ef852a0 Mon Sep 17 00:00:00 2001 From: iximeow Date: Sun, 23 Jun 2024 15:34:03 -0700 Subject: [PATCH 79/95] another fuzz bug --- src/long_mode/display.rs | 12 ++++++------ src/protected_mode/display.rs | 10 +++++----- src/real_mode/display.rs | 10 +++++----- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index 37f832a..907ee9c 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -296,7 +296,7 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> let mut v = imm as u8; if imm < 0 { self.f.write_char('-')?; - v = -imm as u8; + v = imm.unsigned_abs(); } self.f.write_fixed_size("0x")?; self.f.write_u8(v)?; @@ -317,7 +317,7 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> let mut v = imm as u16; if imm < 0 { self.f.write_char('-')?; - v = -imm as u16; + v = imm.unsigned_abs(); } self.f.write_fixed_size("0x")?; self.f.write_u16(v)?; @@ -337,7 +337,7 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> let mut v = imm as u32; if imm < 0 { self.f.write_char('-')?; - v = -imm as u32; + v = imm.unsigned_abs(); } self.f.write_fixed_size("0x")?; self.f.write_u32(v)?; @@ -358,7 +358,7 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> let mut v = imm as u64; if imm < 0 { self.f.write_char('-')?; - v = -imm as u64; + v = imm.unsigned_abs(); } self.f.write_fixed_size("0x")?; self.f.write_u64(v)?; @@ -4287,7 +4287,7 @@ impl<'a, F: DisplaySink> super::OperandVisitor for RelativeBranchPrinter<'a, F> if rel < 0 { self.out.write_char('-')?; //danger_anguished_string_write(&mut self.out, "-"); - v = -rel as u8; + v = rel.unsigned_abs(); } else { self.out.write_char('+')?; // danger_anguished_string_write(&mut self.out, "+"); @@ -4308,7 +4308,7 @@ impl<'a, F: DisplaySink> super::OperandVisitor for RelativeBranchPrinter<'a, F> if rel < 0 { self.out.write_char('-')?; // danger_anguished_string_write(&mut self.out, "-"); - v = -rel as u32; + v = rel.unsigned_abs(); } else { self.out.write_char('+')?; // danger_anguished_string_write(&mut self.out, "+"); diff --git a/src/protected_mode/display.rs b/src/protected_mode/display.rs index bdc0e95..349e5c1 100644 --- a/src/protected_mode/display.rs +++ b/src/protected_mode/display.rs @@ -293,7 +293,7 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> let mut v = imm as u8; if imm < 0 { self.f.write_char('-')?; - v = -imm as u8; + v = imm.unsigned_abs(); } self.f.write_fixed_size("0x")?; self.f.write_u8(v)?; @@ -314,7 +314,7 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> let mut v = imm as u16; if imm < 0 { self.f.write_char('-')?; - v = -imm as u16; + v = imm.unsigned_abs(); } self.f.write_fixed_size("0x")?; self.f.write_u16(v)?; @@ -334,7 +334,7 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> let mut v = imm as u32; if imm < 0 { self.f.write_char('-')?; - v = -imm as u32; + v = imm.unsigned_abs(); } self.f.write_fixed_size("0x")?; self.f.write_u32(v)?; @@ -2839,7 +2839,7 @@ impl<'a, F: DisplaySink> super::OperandVisitor for RelativeBranchPrinter<'a, F> if rel < 0 { self.out.write_char('-')?; //danger_anguished_string_write(&mut self.out, "-"); - v = -rel as u8; + v = rel.unsigned_abs(); } else { self.out.write_char('+')?; // danger_anguished_string_write(&mut self.out, "+"); @@ -2860,7 +2860,7 @@ impl<'a, F: DisplaySink> super::OperandVisitor for RelativeBranchPrinter<'a, F> if rel < 0 { self.out.write_char('-')?; // danger_anguished_string_write(&mut self.out, "-"); - v = -rel as u32; + v = rel.unsigned_abs(); } else { self.out.write_char('+')?; // danger_anguished_string_write(&mut self.out, "+"); diff --git a/src/real_mode/display.rs b/src/real_mode/display.rs index 8de8168..fe68830 100644 --- a/src/real_mode/display.rs +++ b/src/real_mode/display.rs @@ -293,7 +293,7 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> let mut v = imm as u8; if imm < 0 { self.f.write_char('-')?; - v = -imm as u8; + v = imm.unsigned_abs(); } self.f.write_fixed_size("0x")?; self.f.write_u8(v)?; @@ -314,7 +314,7 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> let mut v = imm as u16; if imm < 0 { self.f.write_char('-')?; - v = -imm as u16; + v = imm.unsigned_abs(); } self.f.write_fixed_size("0x")?; self.f.write_u16(v)?; @@ -334,7 +334,7 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> let mut v = imm as u32; if imm < 0 { self.f.write_char('-')?; - v = -imm as u32; + v = imm.unsigned_abs(); } self.f.write_fixed_size("0x")?; self.f.write_u32(v)?; @@ -2839,7 +2839,7 @@ impl<'a, F: DisplaySink> super::OperandVisitor for RelativeBranchPrinter<'a, F> if rel < 0 { self.out.write_char('-')?; //danger_anguished_string_write(&mut self.out, "-"); - v = -rel as u8; + v = rel.unsigned_abs(); } else { self.out.write_char('+')?; // danger_anguished_string_write(&mut self.out, "+"); @@ -2860,7 +2860,7 @@ impl<'a, F: DisplaySink> super::OperandVisitor for RelativeBranchPrinter<'a, F> if rel < 0 { self.out.write_char('-')?; // danger_anguished_string_write(&mut self.out, "-"); - v = -rel as u32; + v = rel.unsigned_abs(); } else { self.out.write_char('+')?; // danger_anguished_string_write(&mut self.out, "+"); From bc4abf8d43d39b16dcc4a37b407c37f97f156568 Mon Sep 17 00:00:00 2001 From: iximeow Date: Sun, 23 Jun 2024 15:35:04 -0700 Subject: [PATCH 80/95] last vestiges of initial perf experiments --- src/long_mode/display.rs | 6 ------ src/protected_mode/display.rs | 6 ------ src/real_mode/display.rs | 6 ------ 3 files changed, 18 deletions(-) diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index 907ee9c..147517a 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -4282,15 +4282,12 @@ impl<'a, F: DisplaySink> super::OperandVisitor for RelativeBranchPrinter<'a, F> fn visit_i8(&mut self, rel: i8) -> Result { if RELATIVE_BRANCHES.contains(&self.inst.opcode) { self.out.write_char('$')?; - // danger_anguished_string_write(self.out, "$"); let mut v = rel as u8; if rel < 0 { self.out.write_char('-')?; - //danger_anguished_string_write(&mut self.out, "-"); v = rel.unsigned_abs(); } else { self.out.write_char('+')?; - // danger_anguished_string_write(&mut self.out, "+"); } self.out.write_fixed_size("0x")?; self.out.write_u8(v)?; @@ -4303,15 +4300,12 @@ impl<'a, F: DisplaySink> super::OperandVisitor for RelativeBranchPrinter<'a, F> fn visit_i32(&mut self, rel: i32) -> Result { if RELATIVE_BRANCHES.contains(&self.inst.opcode) || self.inst.opcode == Opcode::XBEGIN { self.out.write_char('$')?; - // danger_anguished_string_write(self.out, "$"); let mut v = rel as u32; if rel < 0 { self.out.write_char('-')?; - // danger_anguished_string_write(&mut self.out, "-"); v = rel.unsigned_abs(); } else { self.out.write_char('+')?; - // danger_anguished_string_write(&mut self.out, "+"); } self.out.write_fixed_size("0x")?; self.out.write_u32(v)?; diff --git a/src/protected_mode/display.rs b/src/protected_mode/display.rs index 349e5c1..6c63f46 100644 --- a/src/protected_mode/display.rs +++ b/src/protected_mode/display.rs @@ -2834,15 +2834,12 @@ impl<'a, F: DisplaySink> super::OperandVisitor for RelativeBranchPrinter<'a, F> fn visit_i8(&mut self, rel: i8) -> Result { if RELATIVE_BRANCHES.contains(&self.inst.opcode) { self.out.write_char('$')?; - // danger_anguished_string_write(self.out, "$"); let mut v = rel as u8; if rel < 0 { self.out.write_char('-')?; - //danger_anguished_string_write(&mut self.out, "-"); v = rel.unsigned_abs(); } else { self.out.write_char('+')?; - // danger_anguished_string_write(&mut self.out, "+"); } self.out.write_fixed_size("0x")?; self.out.write_u8(v)?; @@ -2855,15 +2852,12 @@ impl<'a, F: DisplaySink> super::OperandVisitor for RelativeBranchPrinter<'a, F> fn visit_i32(&mut self, rel: i32) -> Result { if RELATIVE_BRANCHES.contains(&self.inst.opcode) || self.inst.opcode == Opcode::XBEGIN { self.out.write_char('$')?; - // danger_anguished_string_write(self.out, "$"); let mut v = rel as u32; if rel < 0 { self.out.write_char('-')?; - // danger_anguished_string_write(&mut self.out, "-"); v = rel.unsigned_abs(); } else { self.out.write_char('+')?; - // danger_anguished_string_write(&mut self.out, "+"); } self.out.write_fixed_size("0x")?; self.out.write_u32(v)?; diff --git a/src/real_mode/display.rs b/src/real_mode/display.rs index fe68830..27353b3 100644 --- a/src/real_mode/display.rs +++ b/src/real_mode/display.rs @@ -2834,15 +2834,12 @@ impl<'a, F: DisplaySink> super::OperandVisitor for RelativeBranchPrinter<'a, F> fn visit_i8(&mut self, rel: i8) -> Result { if RELATIVE_BRANCHES.contains(&self.inst.opcode) { self.out.write_char('$')?; - // danger_anguished_string_write(self.out, "$"); let mut v = rel as u8; if rel < 0 { self.out.write_char('-')?; - //danger_anguished_string_write(&mut self.out, "-"); v = rel.unsigned_abs(); } else { self.out.write_char('+')?; - // danger_anguished_string_write(&mut self.out, "+"); } self.out.write_fixed_size("0x")?; self.out.write_u8(v)?; @@ -2855,15 +2852,12 @@ impl<'a, F: DisplaySink> super::OperandVisitor for RelativeBranchPrinter<'a, F> fn visit_i32(&mut self, rel: i32) -> Result { if RELATIVE_BRANCHES.contains(&self.inst.opcode) || self.inst.opcode == Opcode::XBEGIN { self.out.write_char('$')?; - // danger_anguished_string_write(self.out, "$"); let mut v = rel as u32; if rel < 0 { self.out.write_char('-')?; - // danger_anguished_string_write(&mut self.out, "-"); v = rel.unsigned_abs(); } else { self.out.write_char('+')?; - // danger_anguished_string_write(&mut self.out, "+"); } self.out.write_fixed_size("0x")?; self.out.write_u32(v)?; From 0a5e9484d091ee9093d98d961896b81b52ea6e42 Mon Sep 17 00:00:00 2001 From: iximeow Date: Sun, 23 Jun 2024 15:40:43 -0700 Subject: [PATCH 81/95] cfg_attr wants feature, not features plural --- src/long_mode/mod.rs | 2 +- src/protected_mode/mod.rs | 2 +- src/real_mode/mod.rs | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/long_mode/mod.rs b/src/long_mode/mod.rs index 3c25506..114ee6d 100644 --- a/src/long_mode/mod.rs +++ b/src/long_mode/mod.rs @@ -4429,7 +4429,7 @@ impl Instruction { /// if the work you expect to do per-operand is very small, constructing an `Operand` and /// dispatching on tags may be a substantial factor of overall runtime. `visit_operand` can /// reduce total overhead in such cases. - #[cfg_attr(features="profiling", inline(never))] + #[cfg_attr(feature="profiling", inline(never))] fn visit_operand(&self, i: u8, visitor: &mut T) -> Result { assert!(i < 4); let spec = self.operands[i as usize]; diff --git a/src/protected_mode/mod.rs b/src/protected_mode/mod.rs index 6d9031a..ff96a03 100644 --- a/src/protected_mode/mod.rs +++ b/src/protected_mode/mod.rs @@ -4343,7 +4343,7 @@ impl Instruction { /// if the work you expect to do per-operand is very small, constructing an `Operand` and /// dispatching on tags may be a substantial factor of overall runtime. `visit_operand` can /// reduce total overhead in such cases. - #[cfg_attr(features="profiling", inline(never))] + #[cfg_attr(feature="profiling", inline(never))] fn visit_operand(&self, i: u8, visitor: &mut T) -> Result { assert!(i < 4); let spec = self.operands[i as usize]; diff --git a/src/real_mode/mod.rs b/src/real_mode/mod.rs index 162b380..018b5f6 100644 --- a/src/real_mode/mod.rs +++ b/src/real_mode/mod.rs @@ -4343,7 +4343,7 @@ impl Instruction { /// if the work you expect to do per-operand is very small, constructing an `Operand` and /// dispatching on tags may be a substantial factor of overall runtime. `visit_operand` can /// reduce total overhead in such cases. - #[cfg_attr(features="profiling", inline(never))] + #[cfg_attr(feature="profiling", inline(never))] fn visit_operand(&self, i: u8, visitor: &mut T) -> Result { assert!(i < 4); let spec = self.operands[i as usize]; From 09dcfca94240b6c18fbaa1186781dac0d436e500 Mon Sep 17 00:00:00 2001 From: iximeow Date: Sun, 23 Jun 2024 15:41:40 -0700 Subject: [PATCH 82/95] remove yaxpeax-x86 safer_unchecked.rs, it is now in yaxpeax-arch --- src/lib.rs | 2 -- src/long_mode/mod.rs | 2 +- src/protected_mode/mod.rs | 2 +- src/real_mode/mod.rs | 2 +- src/safer_unchecked.rs | 30 ------------------------------ 5 files changed, 3 insertions(+), 35 deletions(-) delete mode 100644 src/safer_unchecked.rs diff --git a/src/lib.rs b/src/lib.rs index a7b8531..836a50c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -138,8 +138,6 @@ pub use protected_mode::Arch as x86_32; pub mod real_mode; pub use real_mode::Arch as x86_16; -mod safer_unchecked; - const MEM_SIZE_STRINGS: [&'static str; 65] = [ "BUG", "byte", "word", "BUG", "dword", "ptr", "far", "BUG", "qword", diff --git a/src/long_mode/mod.rs b/src/long_mode/mod.rs index 114ee6d..5a3dc51 100644 --- a/src/long_mode/mod.rs +++ b/src/long_mode/mod.rs @@ -12,11 +12,11 @@ pub use self::display::{DisplayStyle, InstructionDisplayer}; pub use self::display::InstructionTextBuffer; use core::cmp::PartialEq; -use crate::safer_unchecked::unreachable_kinda_unchecked as unreachable_unchecked; use yaxpeax_arch::{AddressDiff, Decoder, Reader, LengthedInstruction}; use yaxpeax_arch::annotation::{AnnotatingDecoder, DescriptionSink, NullSink}; use yaxpeax_arch::{DecodeError as ArchDecodeError}; +use yaxpeax_arch::safer_unchecked::unreachable_kinda_unchecked as unreachable_unchecked; use core::fmt; impl fmt::Display for DecodeError { diff --git a/src/protected_mode/mod.rs b/src/protected_mode/mod.rs index ff96a03..b02079e 100644 --- a/src/protected_mode/mod.rs +++ b/src/protected_mode/mod.rs @@ -12,11 +12,11 @@ pub use self::display::{DisplayStyle, InstructionDisplayer}; pub use self::display::InstructionTextBuffer; use core::cmp::PartialEq; -use crate::safer_unchecked::unreachable_kinda_unchecked as unreachable_unchecked; use yaxpeax_arch::{AddressDiff, Decoder, Reader, LengthedInstruction}; use yaxpeax_arch::annotation::{AnnotatingDecoder, DescriptionSink, NullSink}; use yaxpeax_arch::{DecodeError as ArchDecodeError}; +use yaxpeax_arch::safer_unchecked::unreachable_kinda_unchecked as unreachable_unchecked; use core::fmt; impl fmt::Display for DecodeError { diff --git a/src/real_mode/mod.rs b/src/real_mode/mod.rs index 018b5f6..e34444e 100644 --- a/src/real_mode/mod.rs +++ b/src/real_mode/mod.rs @@ -12,11 +12,11 @@ pub use self::display::{DisplayStyle, InstructionDisplayer}; pub use self::display::InstructionTextBuffer; use core::cmp::PartialEq; -use crate::safer_unchecked::unreachable_kinda_unchecked as unreachable_unchecked; use yaxpeax_arch::{AddressDiff, Decoder, Reader, LengthedInstruction}; use yaxpeax_arch::annotation::{AnnotatingDecoder, DescriptionSink, NullSink}; use yaxpeax_arch::{DecodeError as ArchDecodeError}; +use yaxpeax_arch::safer_unchecked::unreachable_kinda_unchecked as unreachable_unchecked; use core::fmt; impl fmt::Display for DecodeError { diff --git a/src/safer_unchecked.rs b/src/safer_unchecked.rs deleted file mode 100644 index 34216bc..0000000 --- a/src/safer_unchecked.rs +++ /dev/null @@ -1,30 +0,0 @@ -use core::slice::SliceIndex; - -pub trait GetSaferUnchecked { - unsafe fn get_kinda_unchecked(&self, index: I) -> &>::Output - where - I: SliceIndex<[T]>; -} - -impl GetSaferUnchecked for [T] { - #[inline(always)] - unsafe fn get_kinda_unchecked(&self, index: I) -> &>::Output - where - I: SliceIndex<[T]>, - { - if cfg!(debug_assertions) { - &self[index] - } else { - self.get_unchecked(index) - } - } -} - -#[inline(always)] -pub unsafe fn unreachable_kinda_unchecked() -> ! { - if cfg!(debug_assertions) { - panic!("UB: Unreachable unchecked was executed") - } else { - core::hint::unreachable_unchecked() - } -} From 25b9a530b3b19b9dcb27b9596df51f43bb272bd8 Mon Sep 17 00:00:00 2001 From: iximeow Date: Sun, 23 Jun 2024 22:22:31 -0700 Subject: [PATCH 83/95] fix several sources of dead code warnings in various crate configs --- build.rs | 2 +- src/long_mode/display.rs | 6 +++--- src/long_mode/mod.rs | 9 ++++++--- src/protected_mode/display.rs | 6 +++--- src/protected_mode/mod.rs | 9 ++++++--- src/real_mode/display.rs | 6 +++--- src/real_mode/mod.rs | 9 ++++++--- 7 files changed, 28 insertions(+), 19 deletions(-) diff --git a/build.rs b/build.rs index a941375..116442e 100644 --- a/build.rs +++ b/build.rs @@ -1,5 +1,5 @@ fn main() { - #[cfg(capstone_bench)] + #[cfg(feature="capstone_bench")] { println!("cargo:rustc-link-search=/usr/lib/"); println!("cargo:rustc-link-lib=capstone"); diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index 147517a..6cf2def 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -5,7 +5,7 @@ use core::fmt; use yaxpeax_arch::{Colorize, ShowContextual, NoColors, YaxColors}; use crate::MEM_SIZE_STRINGS; -use crate::long_mode::{RegSpec, Opcode, Operand, MergeMode, InstDecoder, Instruction, Segment, PrefixRex, OperandSpec}; +use crate::long_mode::{RegSpec, Opcode, Operand, MergeMode, InstDecoder, Instruction, Segment, PrefixRex}; use yaxpeax_arch::display::DisplaySink; use yaxpeax_arch::safer_unchecked::GetSaferUnchecked as _; @@ -4204,7 +4204,7 @@ impl ShowContextual { write!(out, " {}", s)?; }, None => { match self.operands[0] { - OperandSpec::Nothing => { + super::OperandSpec::Nothing => { return Ok(()); }, _ => { @@ -4227,7 +4227,7 @@ impl ShowContextual { write!(out, ", {}", s)? } None => { match &self.operands[i] { - &OperandSpec::Nothing => { + &super::OperandSpec::Nothing => { return Ok(()); }, _ => { diff --git a/src/long_mode/mod.rs b/src/long_mode/mod.rs index 5a3dc51..5ed35e3 100644 --- a/src/long_mode/mod.rs +++ b/src/long_mode/mod.rs @@ -807,7 +807,8 @@ impl Operand { /// provided for parity with [`Instruction::visit_operand`]. this has little utility other than /// to reuse an `OperandVisitor` on an `Operand` directly. - pub fn visit(&self, visitor: &mut T) -> Result { + #[allow(dead_code)] // in some configurations this is unused, but it is internal-only for now, so it would warn. + fn visit(&self, visitor: &mut T) -> Result { match self { Operand::Nothing => { visitor.visit_other() @@ -4395,6 +4396,7 @@ impl Opcode { } #[inline(always)] + #[allow(dead_code)] // in some configurations this is unused, but it is internal-only for now, so it would warn. fn can_rep(&self) -> bool { (*self as u32) & 0x2000 != 0 } @@ -4420,7 +4422,8 @@ impl Instruction { Operand::from_spec(self, self.operands[i as usize]) } - /// TODO: make public, document, etc... + // TODO: make public when this seems stable and worthwhile. currently only used for display + // and Displaysink etc.. /// /// `visit_operand` allows code using operands to better specialize and inline with the logic /// that would construct an [`Operand`] variant, without having to necessarily construct an @@ -4430,8 +4433,8 @@ impl Instruction { /// dispatching on tags may be a substantial factor of overall runtime. `visit_operand` can /// reduce total overhead in such cases. #[cfg_attr(feature="profiling", inline(never))] + #[allow(dead_code)] // in some configurations this is unused, but it is internal-only for now, so it would warn. fn visit_operand(&self, i: u8, visitor: &mut T) -> Result { - assert!(i < 4); let spec = self.operands[i as usize]; match spec { OperandSpec::Nothing => { diff --git a/src/protected_mode/display.rs b/src/protected_mode/display.rs index 6c63f46..dc31dbf 100644 --- a/src/protected_mode/display.rs +++ b/src/protected_mode/display.rs @@ -5,7 +5,7 @@ use core::fmt; use yaxpeax_arch::{Colorize, ShowContextual, NoColors, YaxColors}; use crate::MEM_SIZE_STRINGS; -use crate::protected_mode::{RegSpec, Opcode, Operand, MergeMode, InstDecoder, Instruction, Segment, PrefixVex, OperandSpec}; +use crate::protected_mode::{RegSpec, Opcode, Operand, MergeMode, InstDecoder, Instruction, Segment, PrefixVex}; use yaxpeax_arch::display::DisplaySink; use yaxpeax_arch::safer_unchecked::GetSaferUnchecked as _; @@ -2758,7 +2758,7 @@ impl ShowContextual { write!(out, " {}", s)?; }, None => { match self.operands[0] { - OperandSpec::Nothing => { + super::OperandSpec::Nothing => { return Ok(()); }, _ => { @@ -2778,7 +2778,7 @@ impl ShowContextual { write!(out, ", {}", s)? } None => { match &self.operands[i] { - &OperandSpec::Nothing => { + &super::OperandSpec::Nothing => { return Ok(()); }, _ => { diff --git a/src/protected_mode/mod.rs b/src/protected_mode/mod.rs index b02079e..956b5e3 100644 --- a/src/protected_mode/mod.rs +++ b/src/protected_mode/mod.rs @@ -741,7 +741,8 @@ impl Operand { /// provided for parity with [`Instruction::visit_operand`]. this has little utility other than /// to reuse an `OperandVisitor` on an `Operand` directly. - pub fn visit(&self, visitor: &mut T) -> Result { + #[allow(dead_code)] // in some configurations this is unused, but it is internal-only for now, so it would warn. + fn visit(&self, visitor: &mut T) -> Result { match self { Operand::Nothing => { visitor.visit_other() @@ -4309,6 +4310,7 @@ impl Opcode { } #[inline(always)] + #[allow(dead_code)] // in some configurations this is unused, but it is internal-only for now, so it would warn. fn can_rep(&self) -> bool { (*self as u32) & 0x2000 != 0 } @@ -4334,7 +4336,8 @@ impl Instruction { Operand::from_spec(self, self.operands[i as usize]) } - /// TODO: make public, document, etc... + // TODO: make public when this seems stable and worthwhile. currently only used for display + // and Displaysink etc.. /// /// `visit_operand` allows code using operands to better specialize and inline with the logic /// that would construct an [`Operand`] variant, without having to necessarily construct an @@ -4344,8 +4347,8 @@ impl Instruction { /// dispatching on tags may be a substantial factor of overall runtime. `visit_operand` can /// reduce total overhead in such cases. #[cfg_attr(feature="profiling", inline(never))] + #[allow(dead_code)] // in some configurations this is unused, but it is internal-only for now, so it would warn. fn visit_operand(&self, i: u8, visitor: &mut T) -> Result { - assert!(i < 4); let spec = self.operands[i as usize]; match spec { OperandSpec::Nothing => { diff --git a/src/real_mode/display.rs b/src/real_mode/display.rs index 27353b3..38f95bb 100644 --- a/src/real_mode/display.rs +++ b/src/real_mode/display.rs @@ -5,7 +5,7 @@ use core::fmt; use yaxpeax_arch::{Colorize, ShowContextual, NoColors, YaxColors}; use crate::MEM_SIZE_STRINGS; -use crate::real_mode::{RegSpec, Opcode, Operand, MergeMode, InstDecoder, Instruction, Segment, PrefixVex, OperandSpec}; +use crate::real_mode::{RegSpec, Opcode, Operand, MergeMode, InstDecoder, Instruction, Segment, PrefixVex}; use yaxpeax_arch::display::DisplaySink; use yaxpeax_arch::safer_unchecked::GetSaferUnchecked as _; @@ -2758,7 +2758,7 @@ impl ShowContextual { write!(out, " {}", s)?; }, None => { match self.operands[0] { - OperandSpec::Nothing => { + super::OperandSpec::Nothing => { return Ok(()); }, _ => { @@ -2778,7 +2778,7 @@ impl ShowContextual { write!(out, ", {}", s)? } None => { match &self.operands[i] { - &OperandSpec::Nothing => { + &super::OperandSpec::Nothing => { return Ok(()); }, _ => { diff --git a/src/real_mode/mod.rs b/src/real_mode/mod.rs index e34444e..1ea89cd 100644 --- a/src/real_mode/mod.rs +++ b/src/real_mode/mod.rs @@ -741,7 +741,8 @@ impl Operand { /// provided for parity with [`Instruction::visit_operand`]. this has little utility other than /// to reuse an `OperandVisitor` on an `Operand` directly. - pub fn visit(&self, visitor: &mut T) -> Result { + #[allow(dead_code)] // in some configurations this is unused, but it is internal-only for now, so it would warn. + fn visit(&self, visitor: &mut T) -> Result { match self { Operand::Nothing => { visitor.visit_other() @@ -4309,6 +4310,7 @@ impl Opcode { } #[inline(always)] + #[allow(dead_code)] // in some configurations this is unused, but it is internal-only for now, so it would warn. fn can_rep(&self) -> bool { (*self as u32) & 0x2000 != 0 } @@ -4334,7 +4336,8 @@ impl Instruction { Operand::from_spec(self, self.operands[i as usize]) } - /// TODO: make public, document, etc... + // TODO: make public when this seems stable and worthwhile. currently only used for display + // and Displaysink etc.. /// /// `visit_operand` allows code using operands to better specialize and inline with the logic /// that would construct an [`Operand`] variant, without having to necessarily construct an @@ -4344,8 +4347,8 @@ impl Instruction { /// dispatching on tags may be a substantial factor of overall runtime. `visit_operand` can /// reduce total overhead in such cases. #[cfg_attr(feature="profiling", inline(never))] + #[allow(dead_code)] // in some configurations this is unused, but it is internal-only for now, so it would warn. fn visit_operand(&self, i: u8, visitor: &mut T) -> Result { - assert!(i < 4); let spec = self.operands[i as usize]; match spec { OperandSpec::Nothing => { From 577b8e89849db33427e4be961997ad7af5e048f1 Mon Sep 17 00:00:00 2001 From: iximeow Date: Sun, 23 Jun 2024 23:10:32 -0700 Subject: [PATCH 84/95] nightly correctly remarked that == on fat pointers is ambiguous --- src/shared/evex.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shared/evex.in b/src/shared/evex.in index a43a97e..0aa7d95 100644 --- a/src/shared/evex.in +++ b/src/shared/evex.in @@ -173,7 +173,7 @@ pub(crate) fn read_evex< let opc = words.next().ok().ok_or(DecodeError::ExhaustedInput)?; let table_idx = ((m << 2) | p) as usize; let table = generated::TABLES[table_idx]; - if table as *const [_] == &generated::DUMMY[..] as *const [_] { + if core::ptr::eq(table as *const [_], &generated::DUMMY[..] as *const [_]) { return Err(DecodeError::InvalidOpcode); } let mut index_lower = 0; From 238d65c98b2983f87c89f78a793ba7c56dcf7b01 Mon Sep 17 00:00:00 2001 From: iximeow Date: Sun, 23 Jun 2024 23:13:03 -0700 Subject: [PATCH 85/95] update yaxpeax-arch to 0.3.1, fix fuzz target warnings --- Cargo.toml | 2 +- ffi/long_mode/Cargo.toml | 2 +- ffi/multiarch/Cargo.toml | 2 +- ffi/protected_mode/Cargo.toml | 2 +- ffi/real_mode/Cargo.toml | 2 +- fuzz/Cargo.toml | 8 +++++++ fuzz/fuzz_targets/decode_does_not_panic.rs | 6 ++--- .../displaysink_used_correctly.rs | 24 +++++++++++++++++++ ...mall_reg_is_always_old_bank_if_possible.rs | 4 ++-- 9 files changed, 42 insertions(+), 10 deletions(-) create mode 100644 fuzz/fuzz_targets/displaysink_used_correctly.rs diff --git a/Cargo.toml b/Cargo.toml index ecc56be..907a70b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ readme = "README.md" edition = "2018" [dependencies] -yaxpeax-arch = { version = "0.2.7", default-features = false, features = [] } +yaxpeax-arch = { version = "0.3.1", default-features = false, features = [] } "num-traits" = { version = "0.2", default-features = false } "serde" = { version = "1.0", optional = true } "serde_json" = { version = "1.0", optional = true } diff --git a/ffi/long_mode/Cargo.toml b/ffi/long_mode/Cargo.toml index 2df8450..0b6a529 100644 --- a/ffi/long_mode/Cargo.toml +++ b/ffi/long_mode/Cargo.toml @@ -6,7 +6,7 @@ edition = "2018" [dependencies] yaxpeax-x86 = { path = "../../", default-features = false } -yaxpeax-arch = { version = "0.2.7", default-features = false } +yaxpeax-arch = { version = "0.3.1", default-features = false } [lib] name = "yaxpeax_x86_ffi_long_mode" diff --git a/ffi/multiarch/Cargo.toml b/ffi/multiarch/Cargo.toml index c15efbb..1e2cd2d 100644 --- a/ffi/multiarch/Cargo.toml +++ b/ffi/multiarch/Cargo.toml @@ -6,7 +6,7 @@ edition = "2018" [dependencies] yaxpeax-x86 = { path = "../../", default-features = false } -yaxpeax-arch = { version = "0.2.7", default-features = false } +yaxpeax-arch = { version = "0.3.1", default-features = false } [lib] name = "yaxpeax_x86_ffi_multiarch" diff --git a/ffi/protected_mode/Cargo.toml b/ffi/protected_mode/Cargo.toml index 711845e..ef99a39 100644 --- a/ffi/protected_mode/Cargo.toml +++ b/ffi/protected_mode/Cargo.toml @@ -6,7 +6,7 @@ edition = "2018" [dependencies] yaxpeax-x86 = { path = "../../", default-features = false } -yaxpeax-arch = { version = "0.2.7", default-features = false } +yaxpeax-arch = { version = "0.3.1", default-features = false } [lib] name = "yaxpeax_x86_ffi_protected_mode" diff --git a/ffi/real_mode/Cargo.toml b/ffi/real_mode/Cargo.toml index 689472e..c079e0e 100644 --- a/ffi/real_mode/Cargo.toml +++ b/ffi/real_mode/Cargo.toml @@ -6,7 +6,7 @@ edition = "2018" [dependencies] yaxpeax-x86 = { path = "../../", default-features = false } -yaxpeax-arch = { version = "0.2.7", default-features = false } +yaxpeax-arch = { version = "0.3.1", default-features = false } [lib] name = "yaxpeax_x86_ffi_real_mode" diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 60690f6..2203dc3 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -10,6 +10,8 @@ cargo-fuzz = true [dependencies.yaxpeax-x86] path = ".." +[dependencies.yaxpeax-arch] +version = "0.3.1" [dependencies.libfuzzer-sys] git = "https://github.com/rust-fuzz/libfuzzer-sys.git" @@ -27,6 +29,12 @@ path = "fuzz_targets/display_does_not_panic.rs" test = false doc = false +[[bin]] +name = "displaysink_used_correctly" +path = "fuzz_targets/displaysink_used_correctly.rs" +test = false +doc = false + [[bin]] name = "display_c_does_not_panic" path = "fuzz_targets/display_c_does_not_panic.rs" diff --git a/fuzz/fuzz_targets/decode_does_not_panic.rs b/fuzz/fuzz_targets/decode_does_not_panic.rs index 5e6c15d..fd6efec 100644 --- a/fuzz/fuzz_targets/decode_does_not_panic.rs +++ b/fuzz/fuzz_targets/decode_does_not_panic.rs @@ -6,7 +6,7 @@ fuzz_target!(|data: &[u8]| { let x86_64_decoder = yaxpeax_x86::long_mode::InstDecoder::default(); let x86_32_decoder = yaxpeax_x86::protected_mode::InstDecoder::default(); let x86_16_decoder = yaxpeax_x86::real_mode::InstDecoder::default(); - drop(x86_64_decoder.decode_slice(data)); - drop(x86_32_decoder.decode_slice(data)); - drop(x86_16_decoder.decode_slice(data)); + x86_64_decoder.decode_slice(data).expect("is ok"); + x86_32_decoder.decode_slice(data).expect("is ok"); + x86_16_decoder.decode_slice(data).expect("is ok"); }); diff --git a/fuzz/fuzz_targets/displaysink_used_correctly.rs b/fuzz/fuzz_targets/displaysink_used_correctly.rs new file mode 100644 index 0000000..bd8d1db --- /dev/null +++ b/fuzz/fuzz_targets/displaysink_used_correctly.rs @@ -0,0 +1,24 @@ +#![no_main] +#[macro_use] extern crate libfuzzer_sys; +extern crate yaxpeax_x86; +extern crate yaxpeax_arch; + +fuzz_target!(|data: &[u8]| { + let x86_64_decoder = yaxpeax_x86::long_mode::InstDecoder::default(); + let x86_32_decoder = yaxpeax_x86::protected_mode::InstDecoder::default(); + let x86_16_decoder = yaxpeax_x86::real_mode::InstDecoder::default(); + + use yaxpeax_arch::testkit::DisplaySinkValidator; + + if let Ok(inst) = x86_64_decoder.decode_slice(data) { + inst.display_into(&mut DisplaySinkValidator::new()).expect("instruction can be displayed"); + }; + + if let Ok(inst) = x86_32_decoder.decode_slice(data) { + inst.display_into(&mut DisplaySinkValidator::new()).expect("instruction can be displayed"); + }; + + if let Ok(inst) = x86_16_decoder.decode_slice(data) { + inst.display_into(&mut DisplaySinkValidator::new()).expect("instruction can be displayed"); + }; +}); diff --git a/fuzz/fuzz_targets/small_reg_is_always_old_bank_if_possible.rs b/fuzz/fuzz_targets/small_reg_is_always_old_bank_if_possible.rs index a143205..b00ecb4 100644 --- a/fuzz/fuzz_targets/small_reg_is_always_old_bank_if_possible.rs +++ b/fuzz/fuzz_targets/small_reg_is_always_old_bank_if_possible.rs @@ -12,8 +12,8 @@ extern crate yaxpeax_x86; // cases. leaving them in for fuzz targets to match other cases, and In Case Of Future Change. fuzz_target!(|data: &[u8]| { let x86_64_decoder = yaxpeax_x86::long_mode::InstDecoder::default(); - let x86_32_decoder = yaxpeax_x86::protected_mode::InstDecoder::default(); - let x86_16_decoder = yaxpeax_x86::real_mode::InstDecoder::default(); + // let x86_32_decoder = yaxpeax_x86::protected_mode::InstDecoder::default(); + // let x86_16_decoder = yaxpeax_x86::real_mode::InstDecoder::default(); if let Ok(inst) = x86_64_decoder.decode_slice(data) { for i in 0..inst.operand_count() { From 0e3536379e25cf458eaf758fae9e0cb773cbcae6 Mon Sep 17 00:00:00 2001 From: iximeow Date: Sun, 23 Jun 2024 23:13:31 -0700 Subject: [PATCH 86/95] note yaxpeax-arch version bump in changelog --- CHANGELOG | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG b/CHANGELOG index 590f731..3a0193e 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,5 +1,7 @@ ## 1.3.0 +* upgrade to `yaxpeax-arch 0.3.1`, which brings with it a deprecation of the + `Colorize` and `ShowContextual` traits. * fix 32-bit call/jmp not respecting 66 prefix if set - such cases use 16-bit operands, but decoded as if they used 32-bit operands. From b8a294db5ae6831c54be368e41fa8418a6f73bcb Mon Sep 17 00:00:00 2001 From: iximeow Date: Sun, 23 Jun 2024 23:43:06 -0700 Subject: [PATCH 87/95] remove selects_cs(), cs() now does the right thing --- src/long_mode/mod.rs | 5 +---- src/protected_mode/mod.rs | 5 +---- src/real_mode/mod.rs | 5 +---- 3 files changed, 3 insertions(+), 12 deletions(-) diff --git a/src/long_mode/mod.rs b/src/long_mode/mod.rs index 5ed35e3..0aaa35f 100644 --- a/src/long_mode/mod.rs +++ b/src/long_mode/mod.rs @@ -4858,11 +4858,8 @@ impl Prefixes { fn set_lock(&mut self) { self.bits |= 0x4 } #[inline] pub fn lock(&self) -> bool { self.bits & 0x4 == 4 } - #[deprecated(since = "0.0.1", note = "pub fn cs has never returned `bool` indicating the current selector is `cs`. use `selects_cs` for this purpose, until 2.x that will correct `pub fn cs`.")] #[inline] - pub fn cs(&mut self) {} - #[inline] - pub fn selects_cs(&self) -> bool { self.segment == Segment::CS } + pub fn cs(&self) -> bool { self.segment == Segment::CS } #[inline] pub fn ds(&self) -> bool { self.segment == Segment::DS } #[inline] diff --git a/src/protected_mode/mod.rs b/src/protected_mode/mod.rs index 956b5e3..154a746 100644 --- a/src/protected_mode/mod.rs +++ b/src/protected_mode/mod.rs @@ -4790,11 +4790,8 @@ impl Prefixes { fn set_lock(&mut self) { self.bits |= 0x4 } #[inline] pub fn lock(&self) -> bool { self.bits & 0x4 == 4 } - #[deprecated(since = "0.0.1", note = "pub fn cs has never returned `bool` indicating the current selector is `cs`. use `selects_cs` for this purpose, until 2.x that will correct `pub fn cs`.")] #[inline] - pub fn cs(&mut self) {} - #[inline] - pub fn selects_cs(&self) -> bool { self.segment == Segment::CS } + pub fn cs(&self) -> bool { self.segment == Segment::CS } #[inline] fn set_cs(&mut self) { self.segment = Segment::CS } #[inline] diff --git a/src/real_mode/mod.rs b/src/real_mode/mod.rs index 1ea89cd..b29cd1a 100644 --- a/src/real_mode/mod.rs +++ b/src/real_mode/mod.rs @@ -4790,11 +4790,8 @@ impl Prefixes { fn set_lock(&mut self) { self.bits |= 0x4 } #[inline] pub fn lock(&self) -> bool { self.bits & 0x4 == 4 } - #[deprecated(since = "0.0.1", note = "pub fn cs has never returned `bool` indicating the current selector is `cs`. use `selects_cs` for this purpose, until 2.x that will correct `pub fn cs`.")] #[inline] - pub fn cs(&mut self) {} - #[inline] - pub fn selects_cs(&self) -> bool { self.segment == Segment::CS } + pub fn cs(&self) -> bool { self.segment == Segment::CS } #[inline] fn set_cs(&mut self) { self.segment = Segment::CS } #[inline] From 1b8019d5b39a05c109399b8628a1082bfec79755 Mon Sep 17 00:00:00 2001 From: iximeow Date: Mon, 24 Jun 2024 12:48:45 -0700 Subject: [PATCH 88/95] rename most operand variants, make them structy rather than tupley --- CHANGELOG | 39 ++- src/lib.rs | 4 +- src/long_mode/display.rs | 76 ++--- src/long_mode/mod.rs | 493 ++++++++++++++++----------------- src/protected_mode/display.rs | 76 ++--- src/protected_mode/mod.rs | 483 +++++++++++++++----------------- src/real_mode/display.rs | 76 ++--- src/real_mode/mod.rs | 487 ++++++++++++++++---------------- test/long_mode/operand.rs | 16 +- test/protected_mode/operand.rs | 12 +- 10 files changed, 868 insertions(+), 894 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 3a0193e..26046bb 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,7 +1,44 @@ -## 1.3.0 +## 2.0.0 * upgrade to `yaxpeax-arch 0.3.1`, which brings with it a deprecation of the `Colorize` and `ShowContextual` traits. +* because common use of yaxpeax-x86 involves using both this crate and + `yaxpeax-arch`, moving to a newer major version of `yaxpeax-arch` is a major + version bump of `yaxpeax-x86` as well. so, 2.0.0! + +changes: + +* `Operand` variants have had their naming made more consistent. + - many variants starting with "Reg" actually describe a memory access. they + now begin with "Mem" instead. + - several variants mentioned "Scale" in their name, but not "Index", even + though they use an index register. they now do. + - several variants mentioned their constituent parts out of order. for + example, "RegIndexBaseScaleDisp", even though the parts were specified as + base, then index, then scale, then displacement. these names have been + adjusted to reflect the order of their fields, which is roughly the order + those fields are shown when printed. + - `DisplacementU*` operands have always been access to memory at the absolute + address they specify. their names are now `AbsoluteU*` +* `Operand`, across the board, now uses struct-style enum variants, rather than tuple-style. +* the two changes together mean an operand that was + `RegIndexBaseScaleDisp(reg, reg, u8, i32)` + is now + `MemBaseIndexScaleDisp { base, index, scale, disp }` + and similar for other variants. +* two operand kinds, and their masked variants, were never actually constructed, and have been deleted. + - long ago yaxpeax-x86 returned different Operand variants when an index + register was used with scale 1, to hint that no scaling actually occurred. + this was eventually changed to return a scaling Operand variant with + scale==1, but the old variants remained. + - RegIndexBase has been removed + - RegIndexBaseDisp has been removed +* `Prefixes::selects_cs()` has been moved to `Prefixes::cs()`, and the old + useless functions are no more. `inst.prefixes().cs()` is finally a reasonable + way to determine if an instruction reads or writes through the cs prefix. + +fixes: + * fix 32-bit call/jmp not respecting 66 prefix if set - such cases use 16-bit operands, but decoded as if they used 32-bit operands. diff --git a/src/lib.rs b/src/lib.rs index 836a50c..7ab6cb8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -35,10 +35,10 @@ //! #[cfg(features="fmt")] //! assert_eq!("xor eax, dword [rcx]", inst.to_string()); //! -//! assert_eq!(Operand::Register(RegSpec::eax()), inst.operand(0)); +//! assert_eq!(Operand::Register { reg: RegSpec::eax() }, inst.operand(0)); //! #[cfg(features="fmt")] //! assert_eq!("eax", inst.operand(0).to_string()); -//! assert_eq!(Operand::RegDeref(RegSpec::rcx()), inst.operand(1)); +//! assert_eq!(Operand::MemDeref { base: RegSpec::rcx() }, inst.operand(1)); //! //! // an operand in isolation does not know the size of memory it references, if any //! #[cfg(features="fmt")] diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index 6cf2def..469bb90 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -430,9 +430,9 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> } #[cfg_attr(not(feature="profiling"), inline(always))] #[cfg_attr(feature="profiling", inline(never))] - fn visit_disp(&mut self, reg: RegSpec, disp: i32) -> Result { + fn visit_disp(&mut self, base: RegSpec, disp: i32) -> Result { self.f.write_char('[')?; - self.f.write_reg(reg)?; + self.f.write_reg(base)?; self.f.write_fixed_size(" ")?; { @@ -447,23 +447,23 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> } self.f.write_fixed_size("]") } - fn visit_deref(&mut self, reg: RegSpec) -> Result { + fn visit_deref(&mut self, base: RegSpec) -> Result { self.f.write_fixed_size("[")?; - self.f.write_reg(reg)?; + self.f.write_reg(base)?; self.f.write_fixed_size("]") } - fn visit_reg_scale(&mut self, reg: RegSpec, scale: u8) -> Result { + fn visit_index_scale(&mut self, index: RegSpec, scale: u8) -> Result { self.f.write_fixed_size("[")?; - self.f.write_reg(reg)?; + self.f.write_reg(index)?; self.f.write_fixed_size(" * ")?; self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc self.f.write_fixed_size("]")?; Ok(()) } - fn visit_reg_scale_disp(&mut self, reg: RegSpec, scale: u8, disp: i32) -> Result { + fn visit_index_scale_disp(&mut self, index: RegSpec, scale: u8, disp: i32) -> Result { self.f.write_fixed_size("[")?; - self.f.write_reg(reg)?; + self.f.write_reg(index)?; self.f.write_fixed_size(" * ")?; self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc self.f.write_fixed_size(" ")?; @@ -480,7 +480,7 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> } self.f.write_char(']') } - fn visit_index_base_scale(&mut self, base: RegSpec, index: RegSpec, scale: u8) -> Result { + fn visit_base_index_scale(&mut self, base: RegSpec, index: RegSpec, scale: u8) -> Result { self.f.write_fixed_size("[")?; self.f.write_reg(base)?; self.f.write_fixed_size(" + ")?; @@ -489,7 +489,7 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc self.f.write_fixed_size("]") } - fn visit_index_base_scale_disp(&mut self, base: RegSpec, index: RegSpec, scale: u8, disp: i32) -> Result { + fn visit_base_index_scale_disp(&mut self, base: RegSpec, index: RegSpec, scale: u8, disp: i32) -> Result { self.f.write_fixed_size("[")?; self.f.write_reg(base)?; self.f.write_fixed_size(" + ")?; @@ -510,9 +510,9 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> } self.f.write_fixed_size("]") } - fn visit_reg_disp_masked(&mut self, spec: RegSpec, disp: i32, mask_reg: RegSpec) -> Result { + fn visit_disp_masked(&mut self, base: RegSpec, disp: i32, mask_reg: RegSpec) -> Result { self.f.write_char('[')?; - self.f.write_reg(spec)?; + self.f.write_reg(base)?; self.f.write_char(' ')?; let mut v = disp as u32; if disp < 0 { @@ -528,18 +528,18 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> self.f.write_char('}')?; Ok(()) } - fn visit_reg_deref_masked(&mut self, spec: RegSpec, mask_reg: RegSpec) -> Result { + fn visit_deref_masked(&mut self, base: RegSpec, mask_reg: RegSpec) -> Result { self.f.write_fixed_size("[")?; - self.f.write_reg(spec)?; + self.f.write_reg(base)?; self.f.write_fixed_size("]")?; self.f.write_char('{')?; self.f.write_reg(mask_reg)?; self.f.write_char('}')?; Ok(()) } - fn visit_reg_scale_masked(&mut self, spec: RegSpec, scale: u8, mask_reg: RegSpec) -> Result { + fn visit_index_scale_masked(&mut self, index: RegSpec, scale: u8, mask_reg: RegSpec) -> Result { self.f.write_fixed_size("[")?; - self.f.write_reg(spec)?; + self.f.write_reg(index)?; self.f.write_fixed_size(" * ")?; self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc self.f.write_fixed_size("]")?; @@ -548,9 +548,9 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> self.f.write_char('}')?; Ok(()) } - fn visit_reg_scale_disp_masked(&mut self, spec: RegSpec, scale: u8, disp: i32, mask_reg: RegSpec) -> Result { + fn visit_index_scale_disp_masked(&mut self, index: RegSpec, scale: u8, disp: i32, mask_reg: RegSpec) -> Result { self.f.write_fixed_size("[")?; - self.f.write_reg(spec)?; + self.f.write_reg(index)?; self.f.write_fixed_size(" * ")?; self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc self.f.write_fixed_size(" ")?; @@ -568,7 +568,7 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> self.f.write_char('}')?; Ok(()) } - fn visit_index_base_masked(&mut self, base: RegSpec, index: RegSpec, mask_reg: RegSpec) -> Result { + fn visit_base_index_masked(&mut self, base: RegSpec, index: RegSpec, mask_reg: RegSpec) -> Result { self.f.write_fixed_size("[")?; self.f.write_reg(base)?; self.f.write_fixed_size(" + ")?; @@ -579,7 +579,7 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> self.f.write_char('}')?; Ok(()) } - fn visit_index_base_disp_masked(&mut self, base: RegSpec, index: RegSpec, disp: i32, mask_reg: RegSpec) -> Result { + fn visit_base_index_disp_masked(&mut self, base: RegSpec, index: RegSpec, disp: i32, mask_reg: RegSpec) -> Result { self.f.write_fixed_size("[")?; self.f.write_reg(base)?; self.f.write_fixed_size(" + ")?; @@ -599,7 +599,7 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> self.f.write_char('}')?; Ok(()) } - fn visit_index_base_scale_masked(&mut self, base: RegSpec, index: RegSpec, scale: u8, mask_reg: RegSpec) -> Result { + fn visit_base_index_scale_masked(&mut self, base: RegSpec, index: RegSpec, scale: u8, mask_reg: RegSpec) -> Result { self.f.write_fixed_size("[")?; self.f.write_reg(base)?; self.f.write_fixed_size(" + ")?; @@ -612,7 +612,7 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> self.f.write_char('}')?; Ok(()) } - fn visit_index_base_scale_disp_masked(&mut self, base: RegSpec, index: RegSpec, scale: u8, disp: i32, mask_reg: RegSpec) -> Result { + fn visit_base_index_scale_disp_masked(&mut self, base: RegSpec, index: RegSpec, scale: u8, disp: i32, mask_reg: RegSpec) -> Result { self.f.write_fixed_size("[")?; self.f.write_reg(base)?; self.f.write_fixed_size(" + ")?; @@ -3860,7 +3860,7 @@ pub(crate) fn contextualize_c(instr: &Instruction, out: &mut T) let mut out = yaxpeax_arch::display::FmtSink::new(out); use core::fmt::Write; match op { - Operand::ImmediateI8(rel) => { + Operand::ImmediateI8 { imm: rel } => { let rel = if rel >= 0 { out.write_str("$+")?; rel as u8 @@ -3870,7 +3870,7 @@ pub(crate) fn contextualize_c(instr: &Instruction, out: &mut T) }; out.write_prefixed_u8(rel) } - Operand::ImmediateI32(rel) => { + Operand::ImmediateI32 { imm: rel } => { let rel = if rel >= 0 { out.write_str("$+")?; rel as u32 @@ -4272,10 +4272,10 @@ impl<'a, F: DisplaySink> super::OperandVisitor for RelativeBranchPrinter<'a, F> fn visit_reg(&mut self, _reg: RegSpec) -> Result { Ok(false) } - fn visit_deref(&mut self, _reg: RegSpec) -> Result { + fn visit_deref(&mut self, _base: RegSpec) -> Result { Ok(false) } - fn visit_disp(&mut self, _reg: RegSpec, _disp: i32) -> Result { + fn visit_disp(&mut self, _base: RegSpec, _disp: i32) -> Result { Ok(false) } #[cfg_attr(feature="profiling", inline(never))] @@ -4338,16 +4338,16 @@ impl<'a, F: DisplaySink> super::OperandVisitor for RelativeBranchPrinter<'a, F> fn visit_abs_u64(&mut self, _imm: u64) -> Result { Ok(false) } - fn visit_reg_scale(&mut self, _reg: RegSpec, _scale: u8) -> Result { + fn visit_index_scale(&mut self, _index: RegSpec, _scale: u8) -> Result { Ok(false) } - fn visit_index_base_scale(&mut self, _base: RegSpec, _index: RegSpec, _scale: u8) -> Result { + fn visit_base_index_scale(&mut self, _base: RegSpec, _index: RegSpec, _scale: u8) -> Result { Ok(false) } - fn visit_reg_scale_disp(&mut self, _reg: RegSpec, _scale: u8, _disp: i32) -> Result { + fn visit_index_scale_disp(&mut self, _index: RegSpec, _scale: u8, _disp: i32) -> Result { Ok(false) } - fn visit_index_base_scale_disp(&mut self, _base: RegSpec, _index: RegSpec, _scale: u8, _disp: i32) -> Result { + fn visit_base_index_scale_disp(&mut self, _base: RegSpec, _index: RegSpec, _scale: u8, _disp: i32) -> Result { Ok(false) } fn visit_other(&mut self) -> Result { @@ -4362,28 +4362,28 @@ impl<'a, F: DisplaySink> super::OperandVisitor for RelativeBranchPrinter<'a, F> fn visit_reg_mask_merge_sae_noround(&mut self, _spec: RegSpec, _mask: RegSpec, _merge_mode: MergeMode) -> Result { Ok(false) } - fn visit_reg_disp_masked(&mut self, _spec: RegSpec, _disp: i32, _mask_reg: RegSpec) -> Result { + fn visit_disp_masked(&mut self, _base: RegSpec, _disp: i32, _mask_reg: RegSpec) -> Result { Ok(false) } - fn visit_reg_deref_masked(&mut self, _spec: RegSpec, _mask_reg: RegSpec) -> Result { + fn visit_deref_masked(&mut self, _base: RegSpec, _mask_reg: RegSpec) -> Result { Ok(false) } - fn visit_reg_scale_masked(&mut self, _spec: RegSpec, _scale: u8, _mask_reg: RegSpec) -> Result { + fn visit_index_scale_masked(&mut self, _index: RegSpec, _scale: u8, _mask_reg: RegSpec) -> Result { Ok(false) } - fn visit_reg_scale_disp_masked(&mut self, _spec: RegSpec, _scale: u8, _disp: i32, _mask_reg: RegSpec) -> Result { + fn visit_index_scale_disp_masked(&mut self, _index: RegSpec, _scale: u8, _disp: i32, _mask_reg: RegSpec) -> Result { Ok(false) } - fn visit_index_base_masked(&mut self, _base: RegSpec, _index: RegSpec, _mask_reg: RegSpec) -> Result { + fn visit_base_index_masked(&mut self, _base: RegSpec, _index: RegSpec, _mask_reg: RegSpec) -> Result { Ok(false) } - fn visit_index_base_disp_masked(&mut self, _base: RegSpec, _index: RegSpec, _disp: i32, _mask_reg: RegSpec) -> Result { + fn visit_base_index_disp_masked(&mut self, _base: RegSpec, _index: RegSpec, _disp: i32, _mask_reg: RegSpec) -> Result { Ok(false) } - fn visit_index_base_scale_masked(&mut self, _base: RegSpec, _index: RegSpec, _scale: u8, _mask_reg: RegSpec) -> Result { + fn visit_base_index_scale_masked(&mut self, _base: RegSpec, _index: RegSpec, _scale: u8, _mask_reg: RegSpec) -> Result { Ok(false) } - fn visit_index_base_scale_disp_masked(&mut self, _base: RegSpec, _index: RegSpec, _scale: u8, _disp: i32, _mask_reg: RegSpec) -> Result { + fn visit_base_index_scale_disp_masked(&mut self, _base: RegSpec, _index: RegSpec, _scale: u8, _disp: i32, _mask_reg: RegSpec) -> Result { Ok(false) } } diff --git a/src/long_mode/mod.rs b/src/long_mode/mod.rs index 0aaa35f..6dabd6d 100644 --- a/src/long_mode/mod.rs +++ b/src/long_mode/mod.rs @@ -368,91 +368,81 @@ enum SizeCode { #[non_exhaustive] pub enum Operand { /// a sign-extended byte - ImmediateI8(i8), + ImmediateI8 { imm: i8 }, /// a zero-extended byte - ImmediateU8(u8), + ImmediateU8 { imm: u8 }, /// a sign-extended word - ImmediateI16(i16), + ImmediateI16 { imm: i16 }, /// a zero-extended word - ImmediateU16(u16), + ImmediateU16 { imm: u16 }, /// a sign-extended dword - ImmediateI32(i32), + ImmediateI32 { imm: i32 }, /// a zero-extended dword - ImmediateU32(u32), + ImmediateU32 { imm: u32 }, /// a sign-extended qword - ImmediateI64(i64), + ImmediateI64 { imm: i64 }, /// a zero-extended qword - ImmediateU64(u64), + ImmediateU64 { imm: u64 }, /// a bare register operand, such as `rcx`. - Register(RegSpec), + Register { reg: RegSpec }, /// an `avx512` register operand with optional mask register and merge mode, such as /// `zmm3{k4}{z}`. /// /// if the mask register is `k0`, there is no masking applied, and the default x86 operation is /// `MergeMode::Merge`. - RegisterMaskMerge(RegSpec, RegSpec, MergeMode), + RegisterMaskMerge { reg: RegSpec, mask: RegSpec, merge: MergeMode }, /// an `avx512` register operand with optional mask register, merge mode, and suppressed /// exceptions, such as `zmm3{k4}{z}{rd-sae}`. /// /// if the mask register is `k0`, there is no masking applied, and the default x86 operation is /// `MergeMode::Merge`. - RegisterMaskMergeSae(RegSpec, RegSpec, MergeMode, SaeMode), + RegisterMaskMergeSae { reg: RegSpec, mask: RegSpec, merge: MergeMode, sae: SaeMode }, /// an `avx512` register operand with optional mask register, merge mode, and suppressed /// exceptions, with no overridden rounding mode, such as `zmm3{k4}{z}{sae}`. /// /// if the mask register is `k0`, there is no masking applied, and the default x86 operation is /// `MergeMode::Merge`. - RegisterMaskMergeSaeNoround(RegSpec, RegSpec, MergeMode), + RegisterMaskMergeSaeNoround { reg: RegSpec, mask: RegSpec, merge: MergeMode }, /// a memory access to a literal dword address. it's extremely rare that a well-formed x86 /// instruction uses this mode. as an example, `[0x1133]` - DisplacementU32(u32), + AbsoluteU32 { addr: u32 }, /// a memory access to a literal qword address. it's relatively rare that a well-formed x86 /// instruction uses this mode, but plausible. for example, `gs:[0x14]`. segment overrides, /// however, are maintained on the instruction itself. - DisplacementU64(u64), + AbsoluteU64 { addr: u64 }, /// a simple dereference of the address held in some register. for example: `[rsi]`. - RegDeref(RegSpec), + MemDeref { base: RegSpec }, /// a dereference of the address held in some register with offset. for example: `[rsi + 0x14]`. - RegDisp(RegSpec, i32), + Disp { base: RegSpec, disp: i32 }, /// a dereference of the address held in some register scaled by 1, 2, 4, or 8. this is almost always used with the `lea` instruction. for example: `[rdx * 4]`. - RegScale(RegSpec, u8), - /// a dereference of the address from summing two registers. for example: `[rbp + rax]` - RegIndexBase(RegSpec, RegSpec), - /// a dereference of the address from summing two registers with offset. for example: `[rdi + rcx + 0x40]` - RegIndexBaseDisp(RegSpec, RegSpec, i32), + MemIndexScale { index: RegSpec, scale: u8 }, /// a dereference of the address held in some register scaled by 1, 2, 4, or 8 with offset. this is almost always used with the `lea` instruction. for example: `[rax * 4 + 0x30]`. - RegScaleDisp(RegSpec, u8, i32), + MemIndexScaleDisp { index: RegSpec, scale: u8, disp: i32 }, /// a dereference of the address from summing a register and index register scaled by 1, 2, 4, /// or 8. for /// example: `[rsi + rcx * 4]` - RegIndexBaseScale(RegSpec, RegSpec, u8), + MemBaseIndexScale { base: RegSpec, index: RegSpec, scale: u8 }, /// a dereference of the address from summing a register and index register scaled by 1, 2, 4, /// or 8, with offset. for /// example: `[rsi + rcx * 4 + 0x1234]` - RegIndexBaseScaleDisp(RegSpec, RegSpec, u8, i32), + MemBaseIndexScaleDisp { base: RegSpec, index: RegSpec, scale: u8, disp: i32 }, /// an `avx512` dereference of register with optional masking. for example: `[rdx]{k3}` - RegDerefMasked(RegSpec, RegSpec), + MemDerefMasked { base: RegSpec, mask: RegSpec }, /// an `avx512` dereference of register plus offset, with optional masking. for example: `[rsp + 0x40]{k3}` - RegDispMasked(RegSpec, i32, RegSpec), + DispMasked { base: RegSpec, disp: i32, mask: RegSpec }, /// an `avx512` dereference of a register scaled by 1, 2, 4, or 8, with optional masking. this /// seems extraordinarily unlikely to occur in practice. for example: `[rsi * 4]{k2}` - RegScaleMasked(RegSpec, u8, RegSpec), - /// an `avx512` dereference of a register plus index scaled by 1, 2, 4, or 8, with optional masking. - /// for example: `[rsi + rax * 4]{k6}` - RegIndexBaseMasked(RegSpec, RegSpec, RegSpec), - /// an `avx512` dereference of a register plus offset, with optional masking. for example: - /// `[rsi + rax + 0x1313]{k6}` - RegIndexBaseDispMasked(RegSpec, RegSpec, i32, RegSpec), + MemIndexScaleMasked { index: RegSpec, scale: u8, mask: RegSpec }, /// an `avx512` dereference of a register scaled by 1, 2, 4, or 8 plus offset, with optional /// masking. this seems extraordinarily unlikely to occur in practice. for example: `[rsi * /// 4 + 0x1357]{k2}` - RegScaleDispMasked(RegSpec, u8, i32, RegSpec), + MemIndexScaleDispMasked { index: RegSpec, scale: u8, disp: i32, mask: RegSpec }, /// an `avx512` dereference of a register plus index scaled by 1, 2, 4, or 8, with optional /// masking. for example: `[rsi + rax * 4]{k6}` - RegIndexBaseScaleMasked(RegSpec, RegSpec, u8, RegSpec), + MemBaseIndexScaleMasked { base: RegSpec, index: RegSpec, scale: u8, mask: RegSpec }, /// an `avx512` dereference of a register plus index scaled by 1, 2, 4, or 8 and offset, with /// optional masking. for example: `[rsi + rax * 4 + 0x1313]{k6}` - RegIndexBaseScaleDispMasked(RegSpec, RegSpec, u8, i32, RegSpec), + MemBaseIndexScaleDispMasked { base: RegSpec, index: RegSpec, scale: u8, disp: i32, mask: RegSpec }, /// no operand. it is a bug for `yaxpeax-x86` to construct an `Operand` of this kind for public /// use; the instruction's `operand_count` should be reduced so as to make this invisible to /// library clients. @@ -466,11 +456,11 @@ impl OperandSpec { OperandSpec::RegMMM => OperandSpec::RegMMM_maskmerge, OperandSpec::RegVex => OperandSpec::RegVex_maskmerge, OperandSpec::Deref => OperandSpec::Deref_mask, - OperandSpec::RegDisp => OperandSpec::RegDisp_mask, - OperandSpec::RegScale => OperandSpec::RegScale_mask, - OperandSpec::RegScaleDisp => OperandSpec::RegScaleDisp_mask, - OperandSpec::RegIndexBaseScale => OperandSpec::RegIndexBaseScale_mask, - OperandSpec::RegIndexBaseScaleDisp => OperandSpec::RegIndexBaseScaleDisp_mask, + OperandSpec::Disp => OperandSpec::Disp_mask, + OperandSpec::MemIndexScale => OperandSpec::MemIndexScale_mask, + OperandSpec::MemIndexScaleDisp => OperandSpec::MemIndexScaleDisp_mask, + OperandSpec::MemBaseIndexScale => OperandSpec::MemBaseIndexScale_mask, + OperandSpec::MemBaseIndexScaleDisp => OperandSpec::MemBaseIndexScaleDisp_mask, o => o, } } @@ -551,10 +541,10 @@ pub trait OperandVisitor { fn visit_reg(&mut self, reg: RegSpec) -> Result; fn visit_deref(&mut self, reg: RegSpec) -> Result; fn visit_disp(&mut self, reg: RegSpec, disp: i32) -> Result; - fn visit_reg_scale(&mut self, reg: RegSpec, scale: u8) -> Result; - fn visit_index_base_scale(&mut self, base: RegSpec, index: RegSpec, scale: u8) -> Result; - fn visit_index_base_scale_disp(&mut self, base: RegSpec, index: RegSpec, scale: u8, disp: i32) -> Result; - fn visit_reg_scale_disp(&mut self, reg: RegSpec, scale: u8, disp: i32) -> Result; + fn visit_index_scale(&mut self, reg: RegSpec, scale: u8) -> Result; + fn visit_index_scale_disp(&mut self, reg: RegSpec, scale: u8, disp: i32) -> Result; + fn visit_base_index_scale(&mut self, base: RegSpec, index: RegSpec, scale: u8) -> Result; + fn visit_base_index_scale_disp(&mut self, base: RegSpec, index: RegSpec, scale: u8, disp: i32) -> Result; fn visit_i8(&mut self, imm: i8) -> Result; fn visit_u8(&mut self, imm: u8) -> Result; fn visit_i16(&mut self, imm: i16) -> Result; @@ -568,14 +558,14 @@ pub trait OperandVisitor { fn visit_reg_mask_merge(&mut self, base: RegSpec, mask: RegSpec, merge_mode: MergeMode) -> Result; fn visit_reg_mask_merge_sae(&mut self, base: RegSpec, mask: RegSpec, merge_mode: MergeMode, sae_mode: SaeMode) -> Result; fn visit_reg_mask_merge_sae_noround(&mut self, base: RegSpec, mask: RegSpec, merge_mode: MergeMode) -> Result; - fn visit_reg_disp_masked(&mut self, base: RegSpec, disp: i32, mask_reg: RegSpec) -> Result; - fn visit_reg_deref_masked(&mut self, base: RegSpec, mask_reg: RegSpec) -> Result; - fn visit_reg_scale_masked(&mut self, base: RegSpec, scale: u8, mask_reg: RegSpec) -> Result; - fn visit_reg_scale_disp_masked(&mut self, base: RegSpec, scale: u8, disp: i32, mask_reg: RegSpec) -> Result; - fn visit_index_base_masked(&mut self, base: RegSpec, index: RegSpec, mask_reg: RegSpec) -> Result; - fn visit_index_base_disp_masked(&mut self, base: RegSpec, index: RegSpec, disp: i32, mask_reg: RegSpec) -> Result; - fn visit_index_base_scale_masked(&mut self, base: RegSpec, index: RegSpec, scale: u8, mask_reg: RegSpec) -> Result; - fn visit_index_base_scale_disp_masked(&mut self, base: RegSpec, index: RegSpec, scale: u8, disp: i32, mask_reg: RegSpec) -> Result; + fn visit_disp_masked(&mut self, base: RegSpec, disp: i32, mask_reg: RegSpec) -> Result; + fn visit_deref_masked(&mut self, base: RegSpec, mask_reg: RegSpec) -> Result; + fn visit_index_scale_masked(&mut self, base: RegSpec, scale: u8, mask_reg: RegSpec) -> Result; + fn visit_index_scale_disp_masked(&mut self, base: RegSpec, scale: u8, disp: i32, mask_reg: RegSpec) -> Result; + fn visit_base_index_masked(&mut self, base: RegSpec, index: RegSpec, mask_reg: RegSpec) -> Result; + fn visit_base_index_disp_masked(&mut self, base: RegSpec, index: RegSpec, disp: i32, mask_reg: RegSpec) -> Result; + fn visit_base_index_scale_masked(&mut self, base: RegSpec, index: RegSpec, scale: u8, mask_reg: RegSpec) -> Result; + fn visit_base_index_scale_disp_masked(&mut self, base: RegSpec, index: RegSpec, scale: u8, disp: i32, mask_reg: RegSpec) -> Result; fn visit_other(&mut self) -> Result; } @@ -588,140 +578,140 @@ impl Operand { } // the register in modrm_rrr OperandSpec::RegRRR => { - Operand::Register(inst.regs[0]) + Operand::Register { reg: inst.regs[0] } } OperandSpec::RegRRR_maskmerge => { - Operand::RegisterMaskMerge( - inst.regs[0], - RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()), - MergeMode::from(inst.prefixes.evex_unchecked().merge()), - ) + Operand::RegisterMaskMerge { + reg: inst.regs[0], + mask: RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()), + merge: MergeMode::from(inst.prefixes.evex_unchecked().merge()), + } } OperandSpec::RegRRR_maskmerge_sae => { - Operand::RegisterMaskMergeSae( - inst.regs[0], - RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()), - MergeMode::from(inst.prefixes.evex_unchecked().merge()), - SaeMode::from(inst.prefixes.evex_unchecked().vex().l(), inst.prefixes.evex_unchecked().lp()), - ) + Operand::RegisterMaskMergeSae { + reg: inst.regs[0], + mask: RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()), + merge: MergeMode::from(inst.prefixes.evex_unchecked().merge()), + sae: SaeMode::from(inst.prefixes.evex_unchecked().vex().l(), inst.prefixes.evex_unchecked().lp()), + } } OperandSpec::RegRRR_maskmerge_sae_noround => { - Operand::RegisterMaskMergeSaeNoround( - inst.regs[0], - RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()), - MergeMode::from(inst.prefixes.evex_unchecked().merge()), - ) + Operand::RegisterMaskMergeSaeNoround { + reg: inst.regs[0], + mask: RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()), + merge: MergeMode::from(inst.prefixes.evex_unchecked().merge()), + } } // the register in modrm_mmm (eg modrm mod bits were 11) OperandSpec::RegMMM => { - Operand::Register(inst.regs[1]) + Operand::Register { reg: inst.regs[1] } } OperandSpec::RegMMM_maskmerge => { - Operand::RegisterMaskMerge( - inst.regs[1], - RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()), - MergeMode::from(inst.prefixes.evex_unchecked().merge()), - ) + Operand::RegisterMaskMerge { + reg: inst.regs[1], + mask: RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()), + merge: MergeMode::from(inst.prefixes.evex_unchecked().merge()), + } } OperandSpec::RegMMM_maskmerge_sae_noround => { - Operand::RegisterMaskMergeSaeNoround( - inst.regs[1], - RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()), - MergeMode::from(inst.prefixes.evex_unchecked().merge()), - ) + Operand::RegisterMaskMergeSaeNoround { + reg: inst.regs[1], + mask: RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()), + merge: MergeMode::from(inst.prefixes.evex_unchecked().merge()), + } } OperandSpec::RegVex => { - Operand::Register(inst.regs[3]) + Operand::Register { reg: inst.regs[3]} } OperandSpec::RegVex_maskmerge => { - Operand::RegisterMaskMerge( - inst.regs[3], - RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()), - MergeMode::from(inst.prefixes.evex_unchecked().merge()), - ) + Operand::RegisterMaskMerge { + reg: inst.regs[3], + mask: RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()), + merge: MergeMode::from(inst.prefixes.evex_unchecked().merge()), + } } OperandSpec::Reg4 => { - Operand::Register(RegSpec { num: inst.imm as u8, bank: inst.regs[3].bank }) - } - OperandSpec::ImmI8 => Operand::ImmediateI8(inst.imm as i8), - OperandSpec::ImmU8 => Operand::ImmediateU8(inst.imm as u8), - OperandSpec::ImmI16 => Operand::ImmediateI16(inst.imm as i16), - OperandSpec::ImmU16 => Operand::ImmediateU16(inst.imm as u16), - OperandSpec::ImmI32 => Operand::ImmediateI32(inst.imm as i32), - OperandSpec::ImmI64 => Operand::ImmediateI64(inst.imm as i64), - OperandSpec::ImmInDispField => Operand::ImmediateU16(inst.disp as u16), - OperandSpec::DispU32 => Operand::DisplacementU32(inst.disp as u32), - OperandSpec::DispU64 => Operand::DisplacementU64(inst.disp as u64), + Operand::Register { reg: RegSpec { num: inst.imm as u8, bank: inst.regs[3].bank }} + } + OperandSpec::ImmI8 => Operand::ImmediateI8 { imm: inst.imm as i8 }, + OperandSpec::ImmU8 => Operand::ImmediateU8 { imm: inst.imm as u8 }, + OperandSpec::ImmI16 => Operand::ImmediateI16 { imm: inst.imm as i16 }, + OperandSpec::ImmU16 => Operand::ImmediateU16 { imm: inst.imm as u16 }, + OperandSpec::ImmI32 => Operand::ImmediateI32 { imm: inst.imm as i32 }, + OperandSpec::ImmI64 => Operand::ImmediateI64 { imm: inst.imm as i64 }, + OperandSpec::ImmInDispField => Operand::ImmediateU16 { imm: inst.disp as u16 }, + OperandSpec::DispU32 => Operand::AbsoluteU32 { addr: inst.disp as u32 }, + OperandSpec::DispU64 => Operand::AbsoluteU64 { addr: inst.disp as u64 }, OperandSpec::Deref => { - Operand::RegDeref(inst.regs[1]) + Operand::MemDeref { base: inst.regs[1] } } OperandSpec::Deref_esi => { - Operand::RegDeref(RegSpec::esi()) + Operand::MemDeref { base: RegSpec::esi() } } OperandSpec::Deref_edi => { - Operand::RegDeref(RegSpec::edi()) + Operand::MemDeref { base: RegSpec::edi() } } OperandSpec::Deref_rsi => { - Operand::RegDeref(RegSpec::rsi()) + Operand::MemDeref { base: RegSpec::rsi() } } OperandSpec::Deref_rdi => { - Operand::RegDeref(RegSpec::rdi()) + Operand::MemDeref { base: RegSpec::rdi() } } - OperandSpec::RegDisp => { - Operand::RegDisp(inst.regs[1], inst.disp as i32) + OperandSpec::Disp => { + Operand::Disp { base: inst.regs[1], disp: inst.disp as i32 } } - OperandSpec::RegScale => { - Operand::RegScale(inst.regs[2], inst.scale) + OperandSpec::MemIndexScale => { + Operand::MemIndexScale { index: inst.regs[2], scale: inst.scale } } - OperandSpec::RegScaleDisp => { - Operand::RegScaleDisp(inst.regs[2], inst.scale, inst.disp as i32) + OperandSpec::MemIndexScaleDisp => { + Operand::MemIndexScaleDisp { index: inst.regs[2], scale: inst.scale, disp: inst.disp as i32 } } - OperandSpec::RegIndexBaseScale => { - Operand::RegIndexBaseScale(inst.regs[1], inst.regs[2], inst.scale) + OperandSpec::MemBaseIndexScale => { + Operand::MemBaseIndexScale { base: inst.regs[1], index: inst.regs[2], scale: inst.scale } } - OperandSpec::RegIndexBaseScaleDisp => { - Operand::RegIndexBaseScaleDisp(inst.regs[1], inst.regs[2], inst.scale, inst.disp as i32) + OperandSpec::MemBaseIndexScaleDisp => { + Operand::MemBaseIndexScaleDisp { base: inst.regs[1], index: inst.regs[2], scale: inst.scale, disp: inst.disp as i32 } } OperandSpec::Deref_mask => { if inst.prefixes.evex_unchecked().mask_reg() != 0 { - Operand::RegDerefMasked(inst.regs[1], RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg())) + Operand::MemDerefMasked { base: inst.regs[1], mask: RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()) } } else { - Operand::RegDeref(inst.regs[1]) + Operand::MemDeref { base: inst.regs[1] } } } - OperandSpec::RegDisp_mask => { + OperandSpec::Disp_mask => { if inst.prefixes.evex_unchecked().mask_reg() != 0 { - Operand::RegDispMasked(inst.regs[1], inst.disp as i32, RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg())) + Operand::DispMasked { base: inst.regs[1], disp: inst.disp as i32, mask: RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()) } } else { - Operand::RegDisp(inst.regs[1], inst.disp as i32) + Operand::Disp { base: inst.regs[1], disp: inst.disp as i32 } } } - OperandSpec::RegScale_mask => { + OperandSpec::MemIndexScale_mask => { if inst.prefixes.evex_unchecked().mask_reg() != 0 { - Operand::RegScaleMasked(inst.regs[2], inst.scale, RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg())) + Operand::MemIndexScaleMasked { index: inst.regs[2], scale: inst.scale, mask: RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()) } } else { - Operand::RegScale(inst.regs[2], inst.scale) + Operand::MemIndexScale { index: inst.regs[2], scale: inst.scale } } } - OperandSpec::RegScaleDisp_mask => { + OperandSpec::MemIndexScaleDisp_mask => { if inst.prefixes.evex_unchecked().mask_reg() != 0 { - Operand::RegScaleDispMasked(inst.regs[2], inst.scale, inst.disp as i32, RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg())) + Operand::MemIndexScaleDispMasked { index: inst.regs[2], scale: inst.scale, disp: inst.disp as i32, mask: RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()) } } else { - Operand::RegScaleDisp(inst.regs[2], inst.scale, inst.disp as i32) + Operand::MemIndexScaleDisp { index: inst.regs[2], scale: inst.scale, disp: inst.disp as i32 } } } - OperandSpec::RegIndexBaseScale_mask => { + OperandSpec::MemBaseIndexScale_mask => { if inst.prefixes.evex_unchecked().mask_reg() != 0 { - Operand::RegIndexBaseScaleMasked(inst.regs[1], inst.regs[2], inst.scale, RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg())) + Operand::MemBaseIndexScaleMasked { base: inst.regs[1], index: inst.regs[2], scale: inst.scale, mask: RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()) } } else { - Operand::RegIndexBaseScale(inst.regs[1], inst.regs[2], inst.scale) + Operand::MemBaseIndexScale { base: inst.regs[1], index: inst.regs[2], scale: inst.scale } } } - OperandSpec::RegIndexBaseScaleDisp_mask => { + OperandSpec::MemBaseIndexScaleDisp_mask => { if inst.prefixes.evex_unchecked().mask_reg() != 0 { - Operand::RegIndexBaseScaleDispMasked(inst.regs[1], inst.regs[2], inst.scale, inst.disp as i32, RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg())) + Operand::MemBaseIndexScaleDispMasked { base: inst.regs[1], index: inst.regs[2], scale: inst.scale, disp: inst.disp as i32, mask: RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()) } } else { - Operand::RegIndexBaseScaleDisp(inst.regs[1], inst.regs[2], inst.scale, inst.disp as i32) + Operand::MemBaseIndexScaleDisp { base: inst.regs[1], index: inst.regs[2], scale: inst.scale, disp: inst.disp as i32 } } } } @@ -733,38 +723,34 @@ impl Operand { /// memory. pub fn is_memory(&self) -> bool { match self { - Operand::DisplacementU32(_) | - Operand::DisplacementU64(_) | - Operand::RegDeref(_) | - Operand::RegDisp(_, _) | - Operand::RegScale(_, _) | - Operand::RegIndexBase(_, _) | - Operand::RegIndexBaseDisp(_, _, _) | - Operand::RegScaleDisp(_, _, _) | - Operand::RegIndexBaseScale(_, _, _) | - Operand::RegIndexBaseScaleDisp(_, _, _, _) | - Operand::RegDerefMasked(_, _) | - Operand::RegDispMasked(_, _, _) | - Operand::RegScaleMasked(_, _, _) | - Operand::RegIndexBaseMasked(_, _, _) | - Operand::RegIndexBaseDispMasked(_, _, _, _) | - Operand::RegScaleDispMasked(_, _, _, _) | - Operand::RegIndexBaseScaleMasked(_, _, _, _) | - Operand::RegIndexBaseScaleDispMasked(_, _, _, _, _) => { + Operand::AbsoluteU32 { .. } | + Operand::AbsoluteU64 { .. } | + Operand::MemDeref { .. } | + Operand::Disp { .. } | + Operand::MemIndexScale { .. } | + Operand::MemIndexScaleDisp { .. } | + Operand::MemBaseIndexScale { .. } | + Operand::MemBaseIndexScaleDisp { .. } | + Operand::MemDerefMasked { .. } | + Operand::DispMasked { .. } | + Operand::MemIndexScaleMasked { .. } | + Operand::MemIndexScaleDispMasked { .. } | + Operand::MemBaseIndexScaleMasked { .. } | + Operand::MemBaseIndexScaleDispMasked { .. } => { true }, - Operand::ImmediateI8(_) | - Operand::ImmediateU8(_) | - Operand::ImmediateI16(_) | - Operand::ImmediateU16(_) | - Operand::ImmediateU32(_) | - Operand::ImmediateI32(_) | - Operand::ImmediateU64(_) | - Operand::ImmediateI64(_) | - Operand::Register(_) | - Operand::RegisterMaskMerge(_, _, _) | - Operand::RegisterMaskMergeSae(_, _, _, _) | - Operand::RegisterMaskMergeSaeNoround(_, _, _) | + Operand::ImmediateI8 { .. } | + Operand::ImmediateU8 { .. } | + Operand::ImmediateI16 { .. } | + Operand::ImmediateU16 { .. } | + Operand::ImmediateU32 { .. } | + Operand::ImmediateI32 { .. } | + Operand::ImmediateU64 { .. } | + Operand::ImmediateI64 { .. } | + Operand::Register { .. } | + Operand::RegisterMaskMerge { .. } | + Operand::RegisterMaskMergeSae { .. } | + Operand::RegisterMaskMergeSaeNoround { .. } | Operand::Nothing => { false } @@ -776,26 +762,26 @@ impl Operand { /// `Operand` came from; `None` here means the authoritative width is `instr.mem_size()`. pub fn width(&self) -> Option { match self { - Operand::Register(reg) => { + Operand::Register { reg } => { Some(reg.width()) } - Operand::RegisterMaskMerge(reg, _, _) => { + Operand::RegisterMaskMerge { reg, .. } => { Some(reg.width()) } - Operand::ImmediateI8(_) | - Operand::ImmediateU8(_) => { + Operand::ImmediateI8 { .. } | + Operand::ImmediateU8 { .. } => { Some(1) } - Operand::ImmediateI16(_) | - Operand::ImmediateU16(_) => { + Operand::ImmediateI16 { .. } | + Operand::ImmediateU16 { .. } => { Some(2) } - Operand::ImmediateI32(_) | - Operand::ImmediateU32(_) => { + Operand::ImmediateI32 { .. } | + Operand::ImmediateU32 { .. } => { Some(4) } - Operand::ImmediateI64(_) | - Operand::ImmediateU64(_) => { + Operand::ImmediateI64 { .. } | + Operand::ImmediateU64 { .. } => { Some(8) } // memory operands or `Nothing` @@ -813,42 +799,38 @@ impl Operand { Operand::Nothing => { visitor.visit_other() } - Operand::Register(reg) => { + Operand::Register { reg } => { visitor.visit_reg(*reg) } - Operand::RegDeref(reg) => { - visitor.visit_deref(*reg) - } - Operand::RegDisp(reg, disp) => { - visitor.visit_disp(*reg, *disp) - } - Operand::ImmediateI8(imm) => visitor.visit_i8(*imm), - Operand::ImmediateU8(imm) => visitor.visit_u8(*imm), - Operand::ImmediateI16(imm) => visitor.visit_i16(*imm), - Operand::ImmediateU16(imm) => visitor.visit_u16(*imm), - Operand::ImmediateI32(imm) => visitor.visit_i32(*imm), - Operand::ImmediateU32(imm) => visitor.visit_u32(*imm), - Operand::ImmediateI64(imm) => visitor.visit_i64(*imm), - Operand::ImmediateU64(imm) => visitor.visit_u64(*imm), - Operand::DisplacementU32(disp) => visitor.visit_abs_u32(*disp), - Operand::DisplacementU64(disp) => visitor.visit_abs_u64(*disp), - Operand::RegScale(reg, scale) => visitor.visit_reg_scale(*reg, *scale), - Operand::RegScaleDisp(reg, scale, disp) => visitor.visit_reg_scale_disp(*reg, *scale, *disp), - Operand::RegIndexBase(_, _) => { /* not actually reachable anymore */ visitor.visit_other() }, - Operand::RegIndexBaseDisp(_, _, _) => { /* not actually reachable anymore */ visitor.visit_other() }, - Operand::RegIndexBaseScale(base, index, scale) => visitor.visit_index_base_scale(*base, *index, *scale), - Operand::RegIndexBaseScaleDisp(base, index, scale, disp) => visitor.visit_index_base_scale_disp(*base, *index, *scale, *disp), - Operand::RegisterMaskMerge(reg, mask, merge) => visitor.visit_reg_mask_merge(*reg, *mask, *merge), - Operand::RegisterMaskMergeSae(reg, mask, merge, sae) => visitor.visit_reg_mask_merge_sae(*reg, *mask, *merge, *sae), - Operand::RegisterMaskMergeSaeNoround(reg, mask, merge) => visitor.visit_reg_mask_merge_sae_noround(*reg, *mask, *merge), - Operand::RegDerefMasked(reg, mask) => visitor.visit_reg_deref_masked(*reg, *mask), - Operand::RegDispMasked(reg, disp, mask) => visitor.visit_reg_disp_masked(*reg, *disp, *mask), - Operand::RegScaleMasked(reg, scale, mask) => visitor.visit_reg_scale_masked(*reg, *scale, *mask), - Operand::RegIndexBaseMasked(_, _, _) => { /* not actually reachable anymore */ visitor.visit_other() }, - Operand::RegIndexBaseDispMasked(_, _, _, _) => { /* not actually reachable anymore */ visitor.visit_other() }, - Operand::RegScaleDispMasked(base, scale, disp, mask) => visitor.visit_reg_scale_disp_masked(*base, *scale, *disp, *mask), - Operand::RegIndexBaseScaleMasked(base, index, scale, mask) => visitor.visit_index_base_scale_masked(*base, *index, *scale, *mask), - Operand::RegIndexBaseScaleDispMasked(base, index, scale, disp, mask) => visitor.visit_index_base_scale_disp_masked(*base, *index, *scale, *disp, *mask), + Operand::MemDeref { base } => { + visitor.visit_deref(*base) + } + Operand::Disp { base, disp } => { + visitor.visit_disp(*base, *disp) + } + Operand::ImmediateI8 { imm } => visitor.visit_i8(*imm), + Operand::ImmediateU8 { imm } => visitor.visit_u8(*imm), + Operand::ImmediateI16 { imm } => visitor.visit_i16(*imm), + Operand::ImmediateU16 { imm } => visitor.visit_u16(*imm), + Operand::ImmediateI32 { imm } => visitor.visit_i32(*imm), + Operand::ImmediateU32 { imm } => visitor.visit_u32(*imm), + Operand::ImmediateI64 { imm } => visitor.visit_i64(*imm), + Operand::ImmediateU64 { imm } => visitor.visit_u64(*imm), + Operand::AbsoluteU32 { addr } => visitor.visit_abs_u32(*addr), + Operand::AbsoluteU64 { addr } => visitor.visit_abs_u64(*addr), + Operand::MemIndexScale { index, scale } => visitor.visit_index_scale(*index, *scale), + Operand::MemIndexScaleDisp { index, scale, disp } => visitor.visit_index_scale_disp(*index, *scale, *disp), + Operand::MemBaseIndexScale { base, index, scale } => visitor.visit_base_index_scale(*base, *index, *scale), + Operand::MemBaseIndexScaleDisp { base, index, scale, disp } => visitor.visit_base_index_scale_disp(*base, *index, *scale, *disp), + Operand::RegisterMaskMerge { reg, mask, merge } => visitor.visit_reg_mask_merge(*reg, *mask, *merge), + Operand::RegisterMaskMergeSae { reg, mask, merge, sae } => visitor.visit_reg_mask_merge_sae(*reg, *mask, *merge, *sae), + Operand::RegisterMaskMergeSaeNoround { reg, mask, merge } => visitor.visit_reg_mask_merge_sae_noround(*reg, *mask, *merge), + Operand::MemDerefMasked { base, mask } => visitor.visit_deref_masked(*base, *mask), + Operand::DispMasked { base, disp, mask } => visitor.visit_disp_masked(*base, *disp, *mask), + Operand::MemIndexScaleMasked { index, scale, mask } => visitor.visit_index_scale_masked(*index, *scale, *mask), + Operand::MemIndexScaleDispMasked { index, scale, disp, mask } => visitor.visit_index_scale_disp_masked(*index, *scale, *disp, *mask), + Operand::MemBaseIndexScaleMasked { base, index, scale, mask } => visitor.visit_base_index_scale_masked(*base, *index, *scale, *mask), + Operand::MemBaseIndexScaleDispMasked { base, index, scale, disp, mask } => visitor.visit_base_index_scale_disp_masked(*base, *index, *scale, *disp, *mask), } } } @@ -941,16 +923,16 @@ const REGISTER_CLASS_NAMES: &[&'static str] = &[ /// } /// } /// -/// if let Operand::Register(regspec) = instruction.operand(0) { +/// if let Operand::Register { reg } = instruction.operand(0) { /// #[cfg(feature="fmt")] -/// println!("first operand is {}", regspec); -/// show_register_class_info(regspec.class()); +/// println!("first operand is {}", reg); +/// show_register_class_info(reg.class()); /// } /// -/// if let Operand::Register(regspec) = instruction.operand(1) { +/// if let Operand::Register { reg } = instruction.operand(1) { /// #[cfg(feature="fmt")] -/// println!("first operand is {}", regspec); -/// show_register_class_info(regspec.class()); +/// println!("first operand is {}", reg); +/// show_register_class_info(reg.class()); /// } /// ``` /// @@ -2753,17 +2735,17 @@ enum OperandSpec { Deref_edi = 0x90, Deref_rsi = 0x91, Deref_rdi = 0x92, - RegDisp = 0x93, - RegScale = 0x94, - RegScaleDisp = 0x95, - RegIndexBaseScale = 0x96, - RegIndexBaseScaleDisp = 0x97, + Disp = 0x93, + MemIndexScale = 0x94, + MemIndexScaleDisp = 0x95, + MemBaseIndexScale = 0x96, + MemBaseIndexScaleDisp = 0x97, Deref_mask = 0xce, - RegDisp_mask = 0xd3, - RegScale_mask = 0xd4, - RegScaleDisp_mask = 0xd5, - RegIndexBaseScale_mask = 0xd6, - RegIndexBaseScaleDisp_mask = 0xd7, + Disp_mask = 0xd3, + MemIndexScale_mask = 0xd4, + MemIndexScaleDisp_mask = 0xd5, + MemBaseIndexScale_mask = 0xd6, + MemBaseIndexScaleDisp_mask = 0xd7, } // the Hash, Eq, and PartialEq impls here are possibly misleading. @@ -4471,7 +4453,7 @@ impl Instruction { // visitor.visit_other() visitor.visit_deref(RegSpec::rdi()) } - OperandSpec::RegDisp => { + OperandSpec::Disp => { visitor.visit_disp(self.regs[1], self.disp as i32) } OperandSpec::RegRRR_maskmerge => { @@ -4526,61 +4508,58 @@ impl Instruction { OperandSpec::ImmInDispField => visitor.visit_u16(self.disp as u16), OperandSpec::DispU32 => visitor.visit_abs_u32(self.disp as u32), OperandSpec::DispU64 => visitor.visit_abs_u64(self.disp as u64), - OperandSpec::RegScale => { - visitor.visit_reg_scale(self.regs[2], self.scale) + OperandSpec::MemIndexScale => { + visitor.visit_index_scale(self.regs[2], self.scale) } - OperandSpec::RegScaleDisp => { - visitor.visit_reg_scale_disp(self.regs[2], self.scale, self.disp as i32) + OperandSpec::MemIndexScaleDisp => { + visitor.visit_index_scale_disp(self.regs[2], self.scale, self.disp as i32) } - OperandSpec::RegIndexBaseScale => { - visitor.visit_index_base_scale(self.regs[1], self.regs[2], self.scale) - /* - Operand::RegIndexBaseScale(self.regs[1], self.regs[2], self.scale) - */ + OperandSpec::MemBaseIndexScale => { + visitor.visit_base_index_scale(self.regs[1], self.regs[2], self.scale) } - OperandSpec::RegIndexBaseScaleDisp => { - visitor.visit_index_base_scale_disp(self.regs[1], self.regs[2], self.scale, self.disp as i32) + OperandSpec::MemBaseIndexScaleDisp => { + visitor.visit_base_index_scale_disp(self.regs[1], self.regs[2], self.scale, self.disp as i32) } OperandSpec::Deref_mask => { if self.prefixes.evex_unchecked().mask_reg() != 0 { - visitor.visit_reg_deref_masked(self.regs[1], RegSpec::mask(self.prefixes.evex_unchecked().mask_reg())) + visitor.visit_deref_masked(self.regs[1], RegSpec::mask(self.prefixes.evex_unchecked().mask_reg())) } else { visitor.visit_deref(self.regs[1]) } } - OperandSpec::RegDisp_mask => { + OperandSpec::Disp_mask => { if self.prefixes.evex_unchecked().mask_reg() != 0 { - visitor.visit_reg_disp_masked(self.regs[1], self.disp as i32, RegSpec::mask(self.prefixes.evex_unchecked().mask_reg())) + visitor.visit_disp_masked(self.regs[1], self.disp as i32, RegSpec::mask(self.prefixes.evex_unchecked().mask_reg())) } else { visitor.visit_disp(self.regs[1], self.disp as i32) } } - OperandSpec::RegScale_mask => { + OperandSpec::MemIndexScale_mask => { if self.prefixes.evex_unchecked().mask_reg() != 0 { - visitor.visit_reg_scale_masked(self.regs[2], self.scale, RegSpec::mask(self.prefixes.evex_unchecked().mask_reg())) + visitor.visit_index_scale_masked(self.regs[2], self.scale, RegSpec::mask(self.prefixes.evex_unchecked().mask_reg())) } else { - visitor.visit_reg_scale(self.regs[2], self.scale) + visitor.visit_index_scale(self.regs[2], self.scale) } } - OperandSpec::RegScaleDisp_mask => { + OperandSpec::MemIndexScaleDisp_mask => { if self.prefixes.evex_unchecked().mask_reg() != 0 { - visitor.visit_reg_scale_disp_masked(self.regs[2], self.scale, self.disp as i32, RegSpec::mask(self.prefixes.evex_unchecked().mask_reg())) + visitor.visit_index_scale_disp_masked(self.regs[2], self.scale, self.disp as i32, RegSpec::mask(self.prefixes.evex_unchecked().mask_reg())) } else { - visitor.visit_reg_scale_disp(self.regs[2], self.scale, self.disp as i32) + visitor.visit_index_scale_disp(self.regs[2], self.scale, self.disp as i32) } } - OperandSpec::RegIndexBaseScale_mask => { + OperandSpec::MemBaseIndexScale_mask => { if self.prefixes.evex_unchecked().mask_reg() != 0 { - visitor.visit_index_base_scale_masked(self.regs[1], self.regs[2], self.scale, RegSpec::mask(self.prefixes.evex_unchecked().mask_reg())) + visitor.visit_base_index_scale_masked(self.regs[1], self.regs[2], self.scale, RegSpec::mask(self.prefixes.evex_unchecked().mask_reg())) } else { - visitor.visit_index_base_scale(self.regs[1], self.regs[2], self.scale) + visitor.visit_base_index_scale(self.regs[1], self.regs[2], self.scale) } } - OperandSpec::RegIndexBaseScaleDisp_mask => { + OperandSpec::MemBaseIndexScaleDisp_mask => { if self.prefixes.evex_unchecked().mask_reg() != 0 { - visitor.visit_index_base_scale_disp_masked(self.regs[1], self.regs[2], self.scale, self.disp as i32, RegSpec::mask(self.prefixes.evex_unchecked().mask_reg())) + visitor.visit_base_index_scale_disp_masked(self.regs[1], self.regs[2], self.scale, self.disp as i32, RegSpec::mask(self.prefixes.evex_unchecked().mask_reg())) } else { - visitor.visit_index_base_scale_disp(self.regs[1], self.regs[2], self.scale, self.disp as i32) + visitor.visit_base_index_scale_disp(self.regs[1], self.regs[2], self.scale, self.disp as i32) } } } @@ -6253,7 +6232,7 @@ fn read_sib< InnerDescription::Misc("mod bits select no base register") .with_id(sib_start + 0) ); - OperandSpec::RegScale + OperandSpec::MemIndexScale } else { sink.record( modrm_start + 6, @@ -6261,7 +6240,7 @@ fn read_sib< InnerDescription::RegisterNumber("mod", 0b101, instr.regs[1]) .with_id(sib_start + 0) ); - OperandSpec::RegIndexBaseScale + OperandSpec::MemBaseIndexScale } } } else { @@ -6280,7 +6259,7 @@ fn read_sib< InnerDescription::RegisterNumber("iii", instr.regs[2].num & 0b111, instr.regs[2]) .with_id(sib_start + 0) ); - OperandSpec::RegIndexBaseScale + OperandSpec::MemBaseIndexScale } } @@ -6314,7 +6293,7 @@ fn read_sib< InnerDescription::RegisterNumber("mod", 0b101, instr.regs[1]) .with_id(sib_start + 0) ); - OperandSpec::RegDisp + OperandSpec::Disp } } else { sink.record( @@ -6330,7 +6309,7 @@ fn read_sib< InnerDescription::Misc("mod bits select no base register, [index+disp] only") .with_id(sib_start + 0) ); - OperandSpec::RegScaleDisp + OperandSpec::MemIndexScaleDisp } else { sink.record( modrm_start + 6, @@ -6338,7 +6317,7 @@ fn read_sib< InnerDescription::RegisterNumber("mod", 0b101, instr.regs[1]) .with_id(sib_start + 0) ); - OperandSpec::RegIndexBaseScaleDisp + OperandSpec::MemBaseIndexScaleDisp } } } else { @@ -6355,7 +6334,7 @@ fn read_sib< InnerDescription::Misc("iii + rex.x selects no index register") .with_id(sib_start + 0) ); - OperandSpec::RegDisp + OperandSpec::Disp } else { sink.record( sib_start + 3, @@ -6363,7 +6342,7 @@ fn read_sib< InnerDescription::RegisterNumber("iii", instr.regs[2].num & 0b111, instr.regs[2]) .with_id(sib_start + 0) ); - OperandSpec::RegIndexBaseScaleDisp + OperandSpec::MemBaseIndexScaleDisp } } }; @@ -6449,7 +6428,7 @@ fn read_M< OperandSpec::Deref } else { instr.disp = disp as i64 as u64; - OperandSpec::RegDisp + OperandSpec::Disp } } else { sink.record( @@ -6491,7 +6470,7 @@ fn read_M< OperandSpec::Deref } else { instr.disp = disp as i64 as u64; - OperandSpec::RegDisp + OperandSpec::Disp } } }; diff --git a/src/protected_mode/display.rs b/src/protected_mode/display.rs index dc31dbf..1629b4e 100644 --- a/src/protected_mode/display.rs +++ b/src/protected_mode/display.rs @@ -406,9 +406,9 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> } #[cfg_attr(not(feature="profiling"), inline(always))] #[cfg_attr(feature="profiling", inline(never))] - fn visit_disp(&mut self, reg: RegSpec, disp: i32) -> Result { + fn visit_disp(&mut self, base: RegSpec, disp: i32) -> Result { self.f.write_char('[')?; - self.f.write_reg(reg)?; + self.f.write_reg(base)?; self.f.write_fixed_size(" ")?; { @@ -423,23 +423,23 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> } self.f.write_fixed_size("]") } - fn visit_deref(&mut self, reg: RegSpec) -> Result { + fn visit_deref(&mut self, base: RegSpec) -> Result { self.f.write_fixed_size("[")?; - self.f.write_reg(reg)?; + self.f.write_reg(base)?; self.f.write_fixed_size("]") } - fn visit_reg_scale(&mut self, reg: RegSpec, scale: u8) -> Result { + fn visit_index_scale(&mut self, index: RegSpec, scale: u8) -> Result { self.f.write_fixed_size("[")?; - self.f.write_reg(reg)?; + self.f.write_reg(index)?; self.f.write_fixed_size(" * ")?; self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc self.f.write_fixed_size("]")?; Ok(()) } - fn visit_reg_scale_disp(&mut self, reg: RegSpec, scale: u8, disp: i32) -> Result { + fn visit_index_scale_disp(&mut self, index: RegSpec, scale: u8, disp: i32) -> Result { self.f.write_fixed_size("[")?; - self.f.write_reg(reg)?; + self.f.write_reg(index)?; self.f.write_fixed_size(" * ")?; self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc self.f.write_fixed_size(" ")?; @@ -456,7 +456,7 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> } self.f.write_char(']') } - fn visit_index_base_scale(&mut self, base: RegSpec, index: RegSpec, scale: u8) -> Result { + fn visit_base_index_scale(&mut self, base: RegSpec, index: RegSpec, scale: u8) -> Result { self.f.write_fixed_size("[")?; self.f.write_reg(base)?; self.f.write_fixed_size(" + ")?; @@ -465,7 +465,7 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc self.f.write_fixed_size("]") } - fn visit_index_base_scale_disp(&mut self, base: RegSpec, index: RegSpec, scale: u8, disp: i32) -> Result { + fn visit_base_index_scale_disp(&mut self, base: RegSpec, index: RegSpec, scale: u8, disp: i32) -> Result { self.f.write_fixed_size("[")?; self.f.write_reg(base)?; self.f.write_fixed_size(" + ")?; @@ -486,9 +486,9 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> } self.f.write_fixed_size("]") } - fn visit_reg_disp_masked(&mut self, spec: RegSpec, disp: i32, mask_reg: RegSpec) -> Result { + fn visit_disp_masked(&mut self, base: RegSpec, disp: i32, mask_reg: RegSpec) -> Result { self.f.write_char('[')?; - self.f.write_reg(spec)?; + self.f.write_reg(base)?; self.f.write_char(' ')?; let mut v = disp as u32; if disp < 0 { @@ -504,18 +504,18 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> self.f.write_char('}')?; Ok(()) } - fn visit_reg_deref_masked(&mut self, spec: RegSpec, mask_reg: RegSpec) -> Result { + fn visit_deref_masked(&mut self, base: RegSpec, mask_reg: RegSpec) -> Result { self.f.write_fixed_size("[")?; - self.f.write_reg(spec)?; + self.f.write_reg(base)?; self.f.write_fixed_size("]")?; self.f.write_char('{')?; self.f.write_reg(mask_reg)?; self.f.write_char('}')?; Ok(()) } - fn visit_reg_scale_masked(&mut self, spec: RegSpec, scale: u8, mask_reg: RegSpec) -> Result { + fn visit_index_scale_masked(&mut self, index: RegSpec, scale: u8, mask_reg: RegSpec) -> Result { self.f.write_fixed_size("[")?; - self.f.write_reg(spec)?; + self.f.write_reg(index)?; self.f.write_fixed_size(" * ")?; self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc self.f.write_fixed_size("]")?; @@ -524,9 +524,9 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> self.f.write_char('}')?; Ok(()) } - fn visit_reg_scale_disp_masked(&mut self, spec: RegSpec, scale: u8, disp: i32, mask_reg: RegSpec) -> Result { + fn visit_index_scale_disp_masked(&mut self, index: RegSpec, scale: u8, disp: i32, mask_reg: RegSpec) -> Result { self.f.write_fixed_size("[")?; - self.f.write_reg(spec)?; + self.f.write_reg(index)?; self.f.write_fixed_size(" * ")?; self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc self.f.write_fixed_size(" ")?; @@ -544,7 +544,7 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> self.f.write_char('}')?; Ok(()) } - fn visit_index_base_masked(&mut self, base: RegSpec, index: RegSpec, mask_reg: RegSpec) -> Result { + fn visit_base_index_masked(&mut self, base: RegSpec, index: RegSpec, mask_reg: RegSpec) -> Result { self.f.write_fixed_size("[")?; self.f.write_reg(base)?; self.f.write_fixed_size(" + ")?; @@ -555,7 +555,7 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> self.f.write_char('}')?; Ok(()) } - fn visit_index_base_disp_masked(&mut self, base: RegSpec, index: RegSpec, disp: i32, mask_reg: RegSpec) -> Result { + fn visit_base_index_disp_masked(&mut self, base: RegSpec, index: RegSpec, disp: i32, mask_reg: RegSpec) -> Result { self.f.write_fixed_size("[")?; self.f.write_reg(base)?; self.f.write_fixed_size(" + ")?; @@ -575,7 +575,7 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> self.f.write_char('}')?; Ok(()) } - fn visit_index_base_scale_masked(&mut self, base: RegSpec, index: RegSpec, scale: u8, mask_reg: RegSpec) -> Result { + fn visit_base_index_scale_masked(&mut self, base: RegSpec, index: RegSpec, scale: u8, mask_reg: RegSpec) -> Result { self.f.write_fixed_size("[")?; self.f.write_reg(base)?; self.f.write_fixed_size(" + ")?; @@ -588,7 +588,7 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> self.f.write_char('}')?; Ok(()) } - fn visit_index_base_scale_disp_masked(&mut self, base: RegSpec, index: RegSpec, scale: u8, disp: i32, mask_reg: RegSpec) -> Result { + fn visit_base_index_scale_disp_masked(&mut self, base: RegSpec, index: RegSpec, scale: u8, disp: i32, mask_reg: RegSpec) -> Result { self.f.write_fixed_size("[")?; self.f.write_reg(base)?; self.f.write_fixed_size(" + ")?; @@ -2420,7 +2420,7 @@ pub(crate) fn contextualize_c(instr: &Instruction, out: &mut T) let mut out = yaxpeax_arch::display::FmtSink::new(out); use core::fmt::Write; match op { - Operand::ImmediateI8(rel) => { + Operand::ImmediateI8 { imm: rel } => { let rel = if rel >= 0 { out.write_str("$+")?; rel as u8 @@ -2430,7 +2430,7 @@ pub(crate) fn contextualize_c(instr: &Instruction, out: &mut T) }; out.write_prefixed_u8(rel) } - Operand::ImmediateI32(rel) => { + Operand::ImmediateI32 { imm: rel } => { let rel = if rel >= 0 { out.write_str("$+")?; rel as u32 @@ -2824,10 +2824,10 @@ impl<'a, F: DisplaySink> super::OperandVisitor for RelativeBranchPrinter<'a, F> fn visit_reg(&mut self, _reg: RegSpec) -> Result { Ok(false) } - fn visit_deref(&mut self, _reg: RegSpec) -> Result { + fn visit_deref(&mut self, _base: RegSpec) -> Result { Ok(false) } - fn visit_disp(&mut self, _reg: RegSpec, _disp: i32) -> Result { + fn visit_disp(&mut self, _base: RegSpec, _disp: i32) -> Result { Ok(false) } #[cfg_attr(feature="profiling", inline(never))] @@ -2884,16 +2884,16 @@ impl<'a, F: DisplaySink> super::OperandVisitor for RelativeBranchPrinter<'a, F> fn visit_abs_u32(&mut self, _imm: u32) -> Result { Ok(false) } - fn visit_reg_scale(&mut self, _reg: RegSpec, _scale: u8) -> Result { + fn visit_index_scale(&mut self, _index: RegSpec, _scale: u8) -> Result { Ok(false) } - fn visit_index_base_scale(&mut self, _base: RegSpec, _index: RegSpec, _scale: u8) -> Result { + fn visit_base_index_scale(&mut self, _base: RegSpec, _index: RegSpec, _scale: u8) -> Result { Ok(false) } - fn visit_reg_scale_disp(&mut self, _reg: RegSpec, _scale: u8, _disp: i32) -> Result { + fn visit_index_scale_disp(&mut self, _index: RegSpec, _scale: u8, _disp: i32) -> Result { Ok(false) } - fn visit_index_base_scale_disp(&mut self, _base: RegSpec, _index: RegSpec, _scale: u8, _disp: i32) -> Result { + fn visit_base_index_scale_disp(&mut self, _base: RegSpec, _index: RegSpec, _scale: u8, _disp: i32) -> Result { Ok(false) } fn visit_other(&mut self) -> Result { @@ -2908,28 +2908,28 @@ impl<'a, F: DisplaySink> super::OperandVisitor for RelativeBranchPrinter<'a, F> fn visit_reg_mask_merge_sae_noround(&mut self, _spec: RegSpec, _mask: RegSpec, _merge_mode: MergeMode) -> Result { Ok(false) } - fn visit_reg_disp_masked(&mut self, _spec: RegSpec, _disp: i32, _mask_reg: RegSpec) -> Result { + fn visit_disp_masked(&mut self, _base: RegSpec, _disp: i32, _mask_reg: RegSpec) -> Result { Ok(false) } - fn visit_reg_deref_masked(&mut self, _spec: RegSpec, _mask_reg: RegSpec) -> Result { + fn visit_deref_masked(&mut self, _base: RegSpec, _mask_reg: RegSpec) -> Result { Ok(false) } - fn visit_reg_scale_masked(&mut self, _spec: RegSpec, _scale: u8, _mask_reg: RegSpec) -> Result { + fn visit_index_scale_masked(&mut self, _index: RegSpec, _scale: u8, _mask_reg: RegSpec) -> Result { Ok(false) } - fn visit_reg_scale_disp_masked(&mut self, _spec: RegSpec, _scale: u8, _disp: i32, _mask_reg: RegSpec) -> Result { + fn visit_index_scale_disp_masked(&mut self, _index: RegSpec, _scale: u8, _disp: i32, _mask_reg: RegSpec) -> Result { Ok(false) } - fn visit_index_base_masked(&mut self, _base: RegSpec, _index: RegSpec, _mask_reg: RegSpec) -> Result { + fn visit_base_index_masked(&mut self, _base: RegSpec, _index: RegSpec, _mask_reg: RegSpec) -> Result { Ok(false) } - fn visit_index_base_disp_masked(&mut self, _base: RegSpec, _index: RegSpec, _disp: i32, _mask_reg: RegSpec) -> Result { + fn visit_base_index_disp_masked(&mut self, _base: RegSpec, _index: RegSpec, _disp: i32, _mask_reg: RegSpec) -> Result { Ok(false) } - fn visit_index_base_scale_masked(&mut self, _base: RegSpec, _index: RegSpec, _scale: u8, _mask_reg: RegSpec) -> Result { + fn visit_base_index_scale_masked(&mut self, _base: RegSpec, _index: RegSpec, _scale: u8, _mask_reg: RegSpec) -> Result { Ok(false) } - fn visit_index_base_scale_disp_masked(&mut self, _base: RegSpec, _index: RegSpec, _scale: u8, _disp: i32, _mask_reg: RegSpec) -> Result { + fn visit_base_index_scale_disp_masked(&mut self, _base: RegSpec, _index: RegSpec, _scale: u8, _disp: i32, _mask_reg: RegSpec) -> Result { Ok(false) } fn visit_absolute_far_address(&mut self, _segment: u16, _address: u32) -> Result { diff --git a/src/protected_mode/mod.rs b/src/protected_mode/mod.rs index 154a746..081be20 100644 --- a/src/protected_mode/mod.rs +++ b/src/protected_mode/mod.rs @@ -306,87 +306,77 @@ enum SizeCode { #[non_exhaustive] pub enum Operand { /// a sign-extended byte - ImmediateI8(i8), + ImmediateI8 { imm: i8 }, /// a zero-extended byte - ImmediateU8(u8), + ImmediateU8 { imm: u8 }, /// a sign-extended word - ImmediateI16(i16), + ImmediateI16 { imm: i16 }, /// a zero-extended word - ImmediateU16(u16), + ImmediateU16 { imm: u16 }, /// a sign-extended dword - ImmediateI32(i32), + ImmediateI32 { imm: i32 }, /// a zero-extended dword - ImmediateU32(u32), + ImmediateU32 { imm: u32 }, /// a bare register operand, such as `rcx`. - Register(RegSpec), + Register { reg: RegSpec }, /// an `avx512` register operand with optional mask register and merge mode, such as /// `zmm3{k4}{z}`. /// /// if the mask register is `k0`, there is no masking applied, and the default x86 operation is /// `MergeMode::Merge`. - RegisterMaskMerge(RegSpec, RegSpec, MergeMode), + RegisterMaskMerge { reg: RegSpec, mask: RegSpec, merge: MergeMode }, /// an `avx512` register operand with optional mask register, merge mode, and suppressed /// exceptions, such as `zmm3{k4}{z}{rd-sae}`. /// /// if the mask register is `k0`, there is no masking applied, and the default x86 operation is /// `MergeMode::Merge`. - RegisterMaskMergeSae(RegSpec, RegSpec, MergeMode, SaeMode), + RegisterMaskMergeSae { reg: RegSpec, mask: RegSpec, merge: MergeMode, sae: SaeMode }, /// an `avx512` register operand with optional mask register, merge mode, and suppressed /// exceptions, with no overridden rounding mode, such as `zmm3{k4}{z}{sae}`. /// /// if the mask register is `k0`, there is no masking applied, and the default x86 operation is /// `MergeMode::Merge`. - RegisterMaskMergeSaeNoround(RegSpec, RegSpec, MergeMode), + RegisterMaskMergeSaeNoround { reg: RegSpec, mask: RegSpec, merge: MergeMode }, /// a memory access to a literal word address. it's extremely rare that a well-formed x86 /// instruction uses this mode. as an example, `[0x1133]` - DisplacementU16(u16), + AbsoluteU16 { addr: u16 }, /// a memory access to a literal qword address. it's relatively rare that a well-formed x86 /// instruction uses this mode, but plausible. for example, `fs:[0x14]`. segment overrides, /// however, are maintained on the instruction itself. - DisplacementU32(u32), + AbsoluteU32 { addr: u32 }, /// a simple dereference of the address held in some register. for example: `[esi]`. - RegDeref(RegSpec), + MemDeref { base: RegSpec }, /// a dereference of the address held in some register with offset. for example: `[esi + 0x14]`. - RegDisp(RegSpec, i32), + Disp { base: RegSpec, disp: i32 }, /// a dereference of the address held in some register scaled by 1, 2, 4, or 8. this is almost always used with the `lea` instruction. for example: `[edx * 4]`. - RegScale(RegSpec, u8), - /// a dereference of the address from summing two registers. for example: `[ebp + rax]` - RegIndexBase(RegSpec, RegSpec), - /// a dereference of the address from summing two registers with offset. for example: `[edi + ecx + 0x40]` - RegIndexBaseDisp(RegSpec, RegSpec, i32), + MemIndexScale { index: RegSpec, scale: u8 }, /// a dereference of the address held in some register scaled by 1, 2, 4, or 8 with offset. this is almost always used with the `lea` instruction. for example: `[eax * 4 + 0x30]`. - RegScaleDisp(RegSpec, u8, i32), + MemIndexScaleDisp { index: RegSpec, scale: u8, disp: i32 }, /// a dereference of the address from summing a register and index register scaled by 1, 2, 4, /// or 8. for /// example: `[esi + ecx * 4]` - RegIndexBaseScale(RegSpec, RegSpec, u8), + MemBaseIndexScale { base: RegSpec, index: RegSpec, scale: u8 }, /// a dereference of the address from summing a register and index register scaled by 1, 2, 4, /// or 8, with offset. for /// example: `[esi + ecx * 4 + 0x1234]` - RegIndexBaseScaleDisp(RegSpec, RegSpec, u8, i32), + MemBaseIndexScaleDisp { base: RegSpec, index: RegSpec, scale: u8, disp: i32 }, /// an `avx512` dereference of register with optional masking. for example: `[edx]{k3}` - RegDerefMasked(RegSpec, RegSpec), + MemDerefMasked { base: RegSpec, mask: RegSpec }, /// an `avx512` dereference of register plus offset, with optional masking. for example: `[esp + 0x40]{k3}` - RegDispMasked(RegSpec, i32, RegSpec), + DispMasked { base: RegSpec, disp: i32, mask: RegSpec }, /// an `avx512` dereference of a register scaled by 1, 2, 4, or 8, with optional masking. this /// seems extraordinarily unlikely to occur in practice. for example: `[esi * 4]{k2}` - RegScaleMasked(RegSpec, u8, RegSpec), - /// an `avx512` dereference of a register plus index scaled by 1, 2, 4, or 8, with optional masking. - /// for example: `[esi + eax * 4]{k6}` - RegIndexBaseMasked(RegSpec, RegSpec, RegSpec), - /// an `avx512` dereference of a register plus offset, with optional masking. for example: - /// `[esi + eax + 0x1313]{k6}` - RegIndexBaseDispMasked(RegSpec, RegSpec, i32, RegSpec), + MemIndexScaleMasked { index: RegSpec, scale: u8, mask: RegSpec }, /// an `avx512` dereference of a register scaled by 1, 2, 4, or 8 plus offset, with optional /// masking. this seems extraordinarily unlikely to occur in practice. for example: `[esi * /// 4 + 0x1357]{k2}` - RegScaleDispMasked(RegSpec, u8, i32, RegSpec), + MemIndexScaleDispMasked { index: RegSpec, scale: u8, disp: i32, mask: RegSpec }, /// an `avx512` dereference of a register plus index scaled by 1, 2, 4, or 8, with optional /// masking. for example: `[esi + eax * 4]{k6}` - RegIndexBaseScaleMasked(RegSpec, RegSpec, u8, RegSpec), + MemBaseIndexScaleMasked { base: RegSpec, index: RegSpec, scale: u8, mask: RegSpec }, /// an `avx512` dereference of a register plus index scaled by 1, 2, 4, or 8 and offset, with /// optional masking. for example: `[esi + eax * 4 + 0x1313]{k6}` - RegIndexBaseScaleDispMasked(RegSpec, RegSpec, u8, i32, RegSpec), + MemBaseIndexScaleDispMasked { base: RegSpec, index: RegSpec, scale: u8, disp: i32, mask: RegSpec }, /// no operand. it is a bug for `yaxpeax-x86` to construct an `Operand` of this kind for public /// use; the instruction's `operand_count` should be reduced so as to make this invisible to /// library clients. @@ -403,11 +393,11 @@ impl OperandSpec { OperandSpec::RegMMM => OperandSpec::RegMMM_maskmerge, OperandSpec::RegVex => OperandSpec::RegVex_maskmerge, OperandSpec::Deref => OperandSpec::Deref_mask, - OperandSpec::RegDisp => OperandSpec::RegDisp_mask, - OperandSpec::RegScale => OperandSpec::RegScale_mask, - OperandSpec::RegScaleDisp => OperandSpec::RegScaleDisp_mask, - OperandSpec::RegIndexBaseScale => OperandSpec::RegIndexBaseScale_mask, - OperandSpec::RegIndexBaseScaleDisp => OperandSpec::RegIndexBaseScaleDisp_mask, + OperandSpec::Disp => OperandSpec::Disp_mask, + OperandSpec::MemIndexScale => OperandSpec::MemIndexScale_mask, + OperandSpec::MemIndexScaleDisp => OperandSpec::MemIndexScaleDisp_mask, + OperandSpec::MemBaseIndexScale => OperandSpec::MemBaseIndexScale_mask, + OperandSpec::MemBaseIndexScaleDisp => OperandSpec::MemBaseIndexScaleDisp_mask, o => o, } } @@ -487,10 +477,10 @@ pub trait OperandVisitor { fn visit_reg(&mut self, reg: RegSpec) -> Result; fn visit_deref(&mut self, reg: RegSpec) -> Result; fn visit_disp(&mut self, reg: RegSpec, disp: i32) -> Result; - fn visit_reg_scale(&mut self, reg: RegSpec, scale: u8) -> Result; - fn visit_index_base_scale(&mut self, base: RegSpec, index: RegSpec, scale: u8) -> Result; - fn visit_index_base_scale_disp(&mut self, base: RegSpec, index: RegSpec, scale: u8, disp: i32) -> Result; - fn visit_reg_scale_disp(&mut self, reg: RegSpec, scale: u8, disp: i32) -> Result; + fn visit_index_scale(&mut self, index: RegSpec, scale: u8) -> Result; + fn visit_base_index_scale(&mut self, base: RegSpec, index: RegSpec, scale: u8) -> Result; + fn visit_base_index_scale_disp(&mut self, base: RegSpec, index: RegSpec, scale: u8, disp: i32) -> Result; + fn visit_index_scale_disp(&mut self, index: RegSpec, scale: u8, disp: i32) -> Result; fn visit_i8(&mut self, imm: i8) -> Result; fn visit_u8(&mut self, imm: u8) -> Result; fn visit_i16(&mut self, imm: i16) -> Result; @@ -502,14 +492,14 @@ pub trait OperandVisitor { fn visit_reg_mask_merge(&mut self, base: RegSpec, mask: RegSpec, merge_mode: MergeMode) -> Result; fn visit_reg_mask_merge_sae(&mut self, base: RegSpec, mask: RegSpec, merge_mode: MergeMode, sae_mode: SaeMode) -> Result; fn visit_reg_mask_merge_sae_noround(&mut self, base: RegSpec, mask: RegSpec, merge_mode: MergeMode) -> Result; - fn visit_reg_disp_masked(&mut self, base: RegSpec, disp: i32, mask_reg: RegSpec) -> Result; - fn visit_reg_deref_masked(&mut self, base: RegSpec, mask_reg: RegSpec) -> Result; - fn visit_reg_scale_masked(&mut self, base: RegSpec, scale: u8, mask_reg: RegSpec) -> Result; - fn visit_reg_scale_disp_masked(&mut self, base: RegSpec, scale: u8, disp: i32, mask_reg: RegSpec) -> Result; - fn visit_index_base_masked(&mut self, base: RegSpec, index: RegSpec, mask_reg: RegSpec) -> Result; - fn visit_index_base_disp_masked(&mut self, base: RegSpec, index: RegSpec, disp: i32, mask_reg: RegSpec) -> Result; - fn visit_index_base_scale_masked(&mut self, base: RegSpec, index: RegSpec, scale: u8, mask_reg: RegSpec) -> Result; - fn visit_index_base_scale_disp_masked(&mut self, base: RegSpec, index: RegSpec, scale: u8, disp: i32, mask_reg: RegSpec) -> Result; + fn visit_disp_masked(&mut self, base: RegSpec, disp: i32, mask_reg: RegSpec) -> Result; + fn visit_deref_masked(&mut self, base: RegSpec, mask_reg: RegSpec) -> Result; + fn visit_index_scale_masked(&mut self, index: RegSpec, scale: u8, mask_reg: RegSpec) -> Result; + fn visit_index_scale_disp_masked(&mut self, index: RegSpec, scale: u8, disp: i32, mask_reg: RegSpec) -> Result; + fn visit_base_index_masked(&mut self, base: RegSpec, index: RegSpec, mask_reg: RegSpec) -> Result; + fn visit_base_index_disp_masked(&mut self, base: RegSpec, index: RegSpec, disp: i32, mask_reg: RegSpec) -> Result; + fn visit_base_index_scale_masked(&mut self, base: RegSpec, index: RegSpec, scale: u8, mask_reg: RegSpec) -> Result; + fn visit_base_index_scale_disp_masked(&mut self, base: RegSpec, index: RegSpec, scale: u8, disp: i32, mask_reg: RegSpec) -> Result; fn visit_absolute_far_address(&mut self, segment: u16, address: u32) -> Result; fn visit_other(&mut self) -> Result; @@ -523,139 +513,139 @@ impl Operand { } // the register in regs[0] OperandSpec::RegRRR => { - Operand::Register(inst.regs[0]) + Operand::Register { reg: inst.regs[0] } } OperandSpec::RegRRR_maskmerge => { - Operand::RegisterMaskMerge( - inst.regs[0], - RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()), - MergeMode::from(inst.prefixes.evex_unchecked().merge()), - ) + Operand::RegisterMaskMerge { + reg: inst.regs[0], + mask: RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()), + merge: MergeMode::from(inst.prefixes.evex_unchecked().merge()), + } } OperandSpec::RegRRR_maskmerge_sae => { - Operand::RegisterMaskMergeSae( - inst.regs[0], - RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()), - MergeMode::from(inst.prefixes.evex_unchecked().merge()), - SaeMode::from(inst.prefixes.evex_unchecked().vex().l(), inst.prefixes.evex_unchecked().lp()), - ) + Operand::RegisterMaskMergeSae { + reg: inst.regs[0], + mask: RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()), + merge: MergeMode::from(inst.prefixes.evex_unchecked().merge()), + sae: SaeMode::from(inst.prefixes.evex_unchecked().vex().l(), inst.prefixes.evex_unchecked().lp()), + } } OperandSpec::RegRRR_maskmerge_sae_noround => { - Operand::RegisterMaskMergeSaeNoround( - inst.regs[0], - RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()), - MergeMode::from(inst.prefixes.evex_unchecked().merge()), - ) + Operand::RegisterMaskMergeSaeNoround { + reg: inst.regs[0], + mask: RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()), + merge: MergeMode::from(inst.prefixes.evex_unchecked().merge()), + } } // the register in regs[1] (eg modrm mod bits were 11) OperandSpec::RegMMM => { - Operand::Register(inst.regs[1]) + Operand::Register { reg: inst.regs[1] } } OperandSpec::RegMMM_maskmerge => { - Operand::RegisterMaskMerge( - inst.regs[1], - RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()), - MergeMode::from(inst.prefixes.evex_unchecked().merge()), - ) + Operand::RegisterMaskMerge { + reg: inst.regs[1], + mask: RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()), + merge: MergeMode::from(inst.prefixes.evex_unchecked().merge()), + } } OperandSpec::RegMMM_maskmerge_sae_noround => { - Operand::RegisterMaskMergeSaeNoround( - inst.regs[1], - RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()), - MergeMode::from(inst.prefixes.evex_unchecked().merge()), - ) + Operand::RegisterMaskMergeSaeNoround { + reg: inst.regs[1], + mask: RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()), + merge: MergeMode::from(inst.prefixes.evex_unchecked().merge()), + } } OperandSpec::RegVex => { - Operand::Register(inst.regs[3]) + Operand::Register { reg: inst.regs[3] } } OperandSpec::RegVex_maskmerge => { - Operand::RegisterMaskMerge( - inst.regs[3], - RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()), - MergeMode::from(inst.prefixes.evex_unchecked().merge()), - ) + Operand::RegisterMaskMerge { + reg: inst.regs[3], + mask: RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()), + merge: MergeMode::from(inst.prefixes.evex_unchecked().merge()), + } } OperandSpec::Reg4 => { - Operand::Register(RegSpec { num: inst.imm as u8, bank: inst.regs[3].bank }) - } - OperandSpec::ImmI8 => Operand::ImmediateI8(inst.imm as i8), - OperandSpec::ImmU8 => Operand::ImmediateU8(inst.imm as u8), - OperandSpec::ImmI16 => Operand::ImmediateI16(inst.imm as i16), - OperandSpec::ImmU16 => Operand::ImmediateU16(inst.imm as u16), - OperandSpec::ImmI32 => Operand::ImmediateI32(inst.imm as i32), - OperandSpec::ImmInDispField => Operand::ImmediateU16(inst.disp as u16), - OperandSpec::DispU16 => Operand::DisplacementU16(inst.disp as u16), - OperandSpec::DispU32 => Operand::DisplacementU32(inst.disp), + Operand::Register { reg: RegSpec { num: inst.imm as u8, bank: inst.regs[3].bank }} + } + OperandSpec::ImmI8 => Operand::ImmediateI8 { imm: inst.imm as i8 }, + OperandSpec::ImmU8 => Operand::ImmediateU8 { imm: inst.imm as u8 }, + OperandSpec::ImmI16 => Operand::ImmediateI16 { imm: inst.imm as i16 }, + OperandSpec::ImmU16 => Operand::ImmediateU16 { imm: inst.imm as u16 }, + OperandSpec::ImmI32 => Operand::ImmediateI32 { imm: inst.imm as i32 }, + OperandSpec::ImmInDispField => Operand::ImmediateU16 { imm: inst.disp as u16 }, + OperandSpec::DispU16 => Operand::AbsoluteU16 { addr: inst.disp as u16 }, + OperandSpec::DispU32 => Operand::AbsoluteU32 { addr: inst.disp }, OperandSpec::Deref => { - Operand::RegDeref(inst.regs[1]) + Operand::MemDeref { base: inst.regs[1] } } OperandSpec::Deref_si => { - Operand::RegDeref(RegSpec::si()) + Operand::MemDeref { base: RegSpec::si() } } OperandSpec::Deref_di => { - Operand::RegDeref(RegSpec::di()) + Operand::MemDeref { base: RegSpec::di() } } OperandSpec::Deref_esi => { - Operand::RegDeref(RegSpec::esi()) + Operand::MemDeref { base: RegSpec::esi() } } OperandSpec::Deref_edi => { - Operand::RegDeref(RegSpec::edi()) + Operand::MemDeref { base: RegSpec::edi() } } - OperandSpec::RegDisp => { - Operand::RegDisp(inst.regs[1], inst.disp as i32) + OperandSpec::Disp => { + Operand::Disp { base: inst.regs[1], disp: inst.disp as i32 } } - OperandSpec::RegScale => { - Operand::RegScale(inst.regs[2], inst.scale) + OperandSpec::MemIndexScale => { + Operand::MemIndexScale { index: inst.regs[2], scale: inst.scale } } - OperandSpec::RegScaleDisp => { - Operand::RegScaleDisp(inst.regs[2], inst.scale, inst.disp as i32) + OperandSpec::MemIndexScaleDisp => { + Operand::MemIndexScaleDisp { index: inst.regs[2], scale: inst.scale, disp: inst.disp as i32 } } - OperandSpec::RegIndexBaseScale => { - Operand::RegIndexBaseScale(inst.regs[1], inst.regs[2], inst.scale) + OperandSpec::MemBaseIndexScale => { + Operand::MemBaseIndexScale { base: inst.regs[1], index: inst.regs[2], scale: inst.scale } } - OperandSpec::RegIndexBaseScaleDisp => { - Operand::RegIndexBaseScaleDisp(inst.regs[1], inst.regs[2], inst.scale, inst.disp as i32) + OperandSpec::MemBaseIndexScaleDisp => { + Operand::MemBaseIndexScaleDisp { base: inst.regs[1], index: inst.regs[2], scale: inst.scale, disp: inst.disp as i32 } } OperandSpec::Deref_mask => { if inst.prefixes.evex_unchecked().mask_reg() != 0 { - Operand::RegDerefMasked(inst.regs[1], RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg())) + Operand::MemDerefMasked { base: inst.regs[1], mask: RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()) } } else { - Operand::RegDeref(inst.regs[1]) + Operand::MemDeref { base: inst.regs[1] } } } - OperandSpec::RegDisp_mask => { + OperandSpec::Disp_mask => { if inst.prefixes.evex_unchecked().mask_reg() != 0 { - Operand::RegDispMasked(inst.regs[1], inst.disp as i32, RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg())) + Operand::DispMasked { base: inst.regs[1], disp: inst.disp as i32, mask: RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()) } } else { - Operand::RegDisp(inst.regs[1], inst.disp as i32) + Operand::Disp { base: inst.regs[1], disp: inst.disp as i32 } } } - OperandSpec::RegScale_mask => { + OperandSpec::MemIndexScale_mask => { if inst.prefixes.evex_unchecked().mask_reg() != 0 { - Operand::RegScaleMasked(inst.regs[2], inst.scale, RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg())) + Operand::MemIndexScaleMasked { index: inst.regs[2], scale: inst.scale, mask: RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()) } } else { - Operand::RegScale(inst.regs[2], inst.scale) + Operand::MemIndexScale { index: inst.regs[2], scale: inst.scale } } } - OperandSpec::RegScaleDisp_mask => { + OperandSpec::MemIndexScaleDisp_mask => { if inst.prefixes.evex_unchecked().mask_reg() != 0 { - Operand::RegScaleDispMasked(inst.regs[2], inst.scale, inst.disp as i32, RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg())) + Operand::MemIndexScaleDispMasked { index: inst.regs[2], scale: inst.scale, disp: inst.disp as i32, mask: RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()) } } else { - Operand::RegScaleDisp(inst.regs[2], inst.scale, inst.disp as i32) + Operand::MemIndexScaleDisp { index: inst.regs[2], scale: inst.scale, disp: inst.disp as i32 } } } - OperandSpec::RegIndexBaseScale_mask => { + OperandSpec::MemBaseIndexScale_mask => { if inst.prefixes.evex_unchecked().mask_reg() != 0 { - Operand::RegIndexBaseScaleMasked(inst.regs[1], inst.regs[2], inst.scale, RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg())) + Operand::MemBaseIndexScaleMasked { base: inst.regs[1], index: inst.regs[2], scale: inst.scale, mask: RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()) } } else { - Operand::RegIndexBaseScale(inst.regs[1], inst.regs[2], inst.scale) + Operand::MemBaseIndexScale { base: inst.regs[1], index: inst.regs[2], scale: inst.scale } } } - OperandSpec::RegIndexBaseScaleDisp_mask => { + OperandSpec::MemBaseIndexScaleDisp_mask => { if inst.prefixes.evex_unchecked().mask_reg() != 0 { - Operand::RegIndexBaseScaleDispMasked(inst.regs[1], inst.regs[2], inst.scale, inst.disp as i32, RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg())) + Operand::MemBaseIndexScaleDispMasked { base: inst.regs[1], index: inst.regs[2], scale: inst.scale, disp: inst.disp as i32, mask: RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()) } } else { - Operand::RegIndexBaseScaleDisp(inst.regs[1], inst.regs[2], inst.scale, inst.disp as i32) + Operand::MemBaseIndexScaleDisp { base: inst.regs[1], index: inst.regs[2], scale: inst.scale, disp: inst.disp as i32 } } } OperandSpec::AbsoluteFarAddress => { @@ -672,36 +662,32 @@ impl Operand { /// memory. pub fn is_memory(&self) -> bool { match self { - Operand::DisplacementU16(_) | - Operand::DisplacementU32(_) | - Operand::RegDeref(_) | - Operand::RegDisp(_, _) | - Operand::RegScale(_, _) | - Operand::RegIndexBase(_, _) | - Operand::RegIndexBaseDisp(_, _, _) | - Operand::RegScaleDisp(_, _, _) | - Operand::RegIndexBaseScale(_, _, _) | - Operand::RegIndexBaseScaleDisp(_, _, _, _) | - Operand::RegDerefMasked(_, _) | - Operand::RegDispMasked(_, _, _) | - Operand::RegScaleMasked(_, _, _) | - Operand::RegIndexBaseMasked(_, _, _) | - Operand::RegIndexBaseDispMasked(_, _, _, _) | - Operand::RegScaleDispMasked(_, _, _, _) | - Operand::RegIndexBaseScaleMasked(_, _, _, _) | - Operand::RegIndexBaseScaleDispMasked(_, _, _, _, _) => { + Operand::AbsoluteU16 { .. } | + Operand::AbsoluteU32 { .. } | + Operand::MemDeref { .. } | + Operand::Disp { .. } | + Operand::MemIndexScale { .. } | + Operand::MemIndexScaleDisp { .. } | + Operand::MemBaseIndexScale { .. } | + Operand::MemBaseIndexScaleDisp { .. } | + Operand::MemDerefMasked { .. } | + Operand::DispMasked { .. } | + Operand::MemIndexScaleMasked { .. } | + Operand::MemIndexScaleDispMasked { .. } | + Operand::MemBaseIndexScaleMasked { .. } | + Operand::MemBaseIndexScaleDispMasked { .. } => { true }, - Operand::ImmediateI8(_) | - Operand::ImmediateU8(_) | - Operand::ImmediateI16(_) | - Operand::ImmediateU16(_) | - Operand::ImmediateU32(_) | - Operand::ImmediateI32(_) | - Operand::Register(_) | - Operand::RegisterMaskMerge(_, _, _) | - Operand::RegisterMaskMergeSae(_, _, _, _) | - Operand::RegisterMaskMergeSaeNoround(_, _, _) | + Operand::ImmediateI8 { .. } | + Operand::ImmediateU8 { .. } | + Operand::ImmediateI16 { .. } | + Operand::ImmediateU16 { .. } | + Operand::ImmediateU32 { .. } | + Operand::ImmediateI32 { .. } | + Operand::Register { .. } | + Operand::RegisterMaskMerge { .. } | + Operand::RegisterMaskMergeSae { .. } | + Operand::RegisterMaskMergeSaeNoround { .. } | Operand::AbsoluteFarAddress { .. } | Operand::Nothing => { false @@ -714,22 +700,22 @@ impl Operand { /// `Operand` came from; `None` here means the authoritative width is `instr.mem_size()`. pub fn width(&self) -> Option { match self { - Operand::Register(reg) => { + Operand::Register { reg } => { Some(reg.width()) } - Operand::RegisterMaskMerge(reg, _, _) => { + Operand::RegisterMaskMerge { reg, .. } => { Some(reg.width()) } - Operand::ImmediateI8(_) | - Operand::ImmediateU8(_) => { + Operand::ImmediateI8 { .. } | + Operand::ImmediateU8 { .. } => { Some(1) } - Operand::ImmediateI16(_) | - Operand::ImmediateU16(_) => { + Operand::ImmediateI16 { .. } | + Operand::ImmediateU16 { .. } => { Some(2) } - Operand::ImmediateI32(_) | - Operand::ImmediateU32(_) => { + Operand::ImmediateI32 { .. } | + Operand::ImmediateU32 { .. } => { Some(4) } // memory operands or `Nothing` @@ -747,40 +733,36 @@ impl Operand { Operand::Nothing => { visitor.visit_other() } - Operand::Register(reg) => { + Operand::Register { reg } => { visitor.visit_reg(*reg) } - Operand::RegDeref(reg) => { - visitor.visit_deref(*reg) - } - Operand::RegDisp(reg, disp) => { - visitor.visit_disp(*reg, *disp) - } - Operand::ImmediateI8(imm) => visitor.visit_i8(*imm), - Operand::ImmediateU8(imm) => visitor.visit_u8(*imm), - Operand::ImmediateI16(imm) => visitor.visit_i16(*imm), - Operand::ImmediateU16(imm) => visitor.visit_u16(*imm), - Operand::ImmediateI32(imm) => visitor.visit_i32(*imm), - Operand::ImmediateU32(imm) => visitor.visit_u32(*imm), - Operand::DisplacementU16(disp) => visitor.visit_abs_u16(*disp), - Operand::DisplacementU32(disp) => visitor.visit_abs_u32(*disp), - Operand::RegScale(reg, scale) => visitor.visit_reg_scale(*reg, *scale), - Operand::RegScaleDisp(reg, scale, disp) => visitor.visit_reg_scale_disp(*reg, *scale, *disp), - Operand::RegIndexBase(_, _) => { /* not actually reachable anymore */ visitor.visit_other() }, - Operand::RegIndexBaseDisp(_, _, _) => { /* not actually reachable anymore */ visitor.visit_other() }, - Operand::RegIndexBaseScale(base, index, scale) => visitor.visit_index_base_scale(*base, *index, *scale), - Operand::RegIndexBaseScaleDisp(base, index, scale, disp) => visitor.visit_index_base_scale_disp(*base, *index, *scale, *disp), - Operand::RegisterMaskMerge(reg, mask, merge) => visitor.visit_reg_mask_merge(*reg, *mask, *merge), - Operand::RegisterMaskMergeSae(reg, mask, merge, sae) => visitor.visit_reg_mask_merge_sae(*reg, *mask, *merge, *sae), - Operand::RegisterMaskMergeSaeNoround(reg, mask, merge) => visitor.visit_reg_mask_merge_sae_noround(*reg, *mask, *merge), - Operand::RegDerefMasked(reg, mask) => visitor.visit_reg_deref_masked(*reg, *mask), - Operand::RegDispMasked(reg, disp, mask) => visitor.visit_reg_disp_masked(*reg, *disp, *mask), - Operand::RegScaleMasked(reg, scale, mask) => visitor.visit_reg_scale_masked(*reg, *scale, *mask), - Operand::RegIndexBaseMasked(_, _, _) => { /* not actually reachable anymore */ visitor.visit_other() }, - Operand::RegIndexBaseDispMasked(_, _, _, _) => { /* not actually reachable anymore */ visitor.visit_other() }, - Operand::RegScaleDispMasked(base, scale, disp, mask) => visitor.visit_reg_scale_disp_masked(*base, *scale, *disp, *mask), - Operand::RegIndexBaseScaleMasked(base, index, scale, mask) => visitor.visit_index_base_scale_masked(*base, *index, *scale, *mask), - Operand::RegIndexBaseScaleDispMasked(base, index, scale, disp, mask) => visitor.visit_index_base_scale_disp_masked(*base, *index, *scale, *disp, *mask), + Operand::MemDeref { base } => { + visitor.visit_deref(*base) + } + Operand::Disp { base, disp } => { + visitor.visit_disp(*base, *disp) + } + Operand::ImmediateI8 { imm } => visitor.visit_i8(*imm), + Operand::ImmediateU8 { imm } => visitor.visit_u8(*imm), + Operand::ImmediateI16 { imm } => visitor.visit_i16(*imm), + Operand::ImmediateU16 { imm } => visitor.visit_u16(*imm), + Operand::ImmediateI32 { imm } => visitor.visit_i32(*imm), + Operand::ImmediateU32 { imm } => visitor.visit_u32(*imm), + Operand::AbsoluteU16 { addr } => visitor.visit_abs_u16(*addr), + Operand::AbsoluteU32 { addr } => visitor.visit_abs_u32(*addr), + Operand::MemIndexScale { index, scale } => visitor.visit_index_scale(*index, *scale), + Operand::MemIndexScaleDisp { index, scale, disp } => visitor.visit_index_scale_disp(*index, *scale, *disp), + Operand::MemBaseIndexScale { base, index, scale } => visitor.visit_base_index_scale(*base, *index, *scale), + Operand::MemBaseIndexScaleDisp { base, index, scale, disp } => visitor.visit_base_index_scale_disp(*base, *index, *scale, *disp), + Operand::RegisterMaskMerge { reg, mask, merge } => visitor.visit_reg_mask_merge(*reg, *mask, *merge), + Operand::RegisterMaskMergeSae { reg, mask, merge, sae } => visitor.visit_reg_mask_merge_sae(*reg, *mask, *merge, *sae), + Operand::RegisterMaskMergeSaeNoround { reg, mask, merge } => visitor.visit_reg_mask_merge_sae_noround(*reg, *mask, *merge), + Operand::MemDerefMasked { base, mask } => visitor.visit_deref_masked(*base, *mask), + Operand::DispMasked { base, disp, mask } => visitor.visit_disp_masked(*base, *disp, *mask), + Operand::MemIndexScaleMasked { index, scale, mask } => visitor.visit_index_scale_masked(*index, *scale, *mask), + Operand::MemIndexScaleDispMasked { index, scale, disp, mask } => visitor.visit_index_scale_disp_masked(*index, *scale, *disp, *mask), + Operand::MemBaseIndexScaleMasked { base, index, scale, mask } => visitor.visit_base_index_scale_masked(*base, *index, *scale, *mask), + Operand::MemBaseIndexScaleDispMasked { base, index, scale, disp, mask } => visitor.visit_base_index_scale_disp_masked(*base, *index, *scale, *disp, *mask), Operand::AbsoluteFarAddress { segment, address } => visitor.visit_absolute_far_address(*segment, *address), } } @@ -865,16 +847,16 @@ const REGISTER_CLASS_NAMES: &[&'static str] = &[ /// } /// } /// -/// if let Operand::Register(regspec) = instruction.operand(0) { +/// if let Operand::Register { reg } = instruction.operand(0) { /// #[cfg(feature="fmt")] -/// println!("first operand is {}", regspec); -/// show_register_class_info(regspec.class()); +/// println!("first operand is {}", reg); +/// show_register_class_info(reg.class()); /// } /// -/// if let Operand::Register(regspec) = instruction.operand(1) { +/// if let Operand::Register { reg } = instruction.operand(1) { /// #[cfg(feature="fmt")] -/// println!("first operand is {}", regspec); -/// show_register_class_info(regspec.class()); +/// println!("second operand is {}", reg); +/// show_register_class_info(reg.class()); /// } /// ``` /// @@ -2667,17 +2649,17 @@ enum OperandSpec { Deref_di = 0x90, Deref_esi = 0x91, Deref_edi = 0x92, - RegDisp = 0x93, - RegScale = 0x94, - RegScaleDisp = 0x95, - RegIndexBaseScale = 0x96, - RegIndexBaseScaleDisp = 0x97, + Disp = 0x93, + MemIndexScale = 0x94, + MemIndexScaleDisp = 0x95, + MemBaseIndexScale = 0x96, + MemBaseIndexScaleDisp = 0x97, Deref_mask = 0xce, - RegDisp_mask = 0xd3, - RegScale_mask = 0xd4, - RegScaleDisp_mask = 0xd5, - RegIndexBaseScale_mask = 0xd6, - RegIndexBaseScaleDisp_mask = 0xd7, + Disp_mask = 0xd3, + MemIndexScale_mask = 0xd4, + MemIndexScaleDisp_mask = 0xd5, + MemBaseIndexScale_mask = 0xd6, + MemBaseIndexScaleDisp_mask = 0xd7, // u16:u{16,32} immediate address for a far call AbsoluteFarAddress = 0x18, } @@ -4385,7 +4367,7 @@ impl Instruction { // visitor.visit_other() visitor.visit_deref(RegSpec::edi()) } - OperandSpec::RegDisp => { + OperandSpec::Disp => { visitor.visit_disp(self.regs[1], self.disp as i32) } OperandSpec::RegRRR_maskmerge => { @@ -4439,61 +4421,58 @@ impl Instruction { OperandSpec::ImmInDispField => visitor.visit_u16(self.disp as u16), OperandSpec::DispU16 => visitor.visit_abs_u16(self.disp as u16), OperandSpec::DispU32 => visitor.visit_abs_u32(self.disp as u32), - OperandSpec::RegScale => { - visitor.visit_reg_scale(self.regs[2], self.scale) + OperandSpec::MemIndexScale => { + visitor.visit_index_scale(self.regs[2], self.scale) } - OperandSpec::RegScaleDisp => { - visitor.visit_reg_scale_disp(self.regs[2], self.scale, self.disp as i32) + OperandSpec::MemIndexScaleDisp => { + visitor.visit_index_scale_disp(self.regs[2], self.scale, self.disp as i32) } - OperandSpec::RegIndexBaseScale => { - visitor.visit_index_base_scale(self.regs[1], self.regs[2], self.scale) - /* - Operand::RegIndexBaseScale(self.regs[1], self.regs[2], self.scale) - */ + OperandSpec::MemBaseIndexScale => { + visitor.visit_base_index_scale(self.regs[1], self.regs[2], self.scale) } - OperandSpec::RegIndexBaseScaleDisp => { - visitor.visit_index_base_scale_disp(self.regs[1], self.regs[2], self.scale, self.disp as i32) + OperandSpec::MemBaseIndexScaleDisp => { + visitor.visit_base_index_scale_disp(self.regs[1], self.regs[2], self.scale, self.disp as i32) } OperandSpec::Deref_mask => { if self.prefixes.evex_unchecked().mask_reg() != 0 { - visitor.visit_reg_deref_masked(self.regs[1], RegSpec::mask(self.prefixes.evex_unchecked().mask_reg())) + visitor.visit_deref_masked(self.regs[1], RegSpec::mask(self.prefixes.evex_unchecked().mask_reg())) } else { visitor.visit_deref(self.regs[1]) } } - OperandSpec::RegDisp_mask => { + OperandSpec::Disp_mask => { if self.prefixes.evex_unchecked().mask_reg() != 0 { - visitor.visit_reg_disp_masked(self.regs[1], self.disp as i32, RegSpec::mask(self.prefixes.evex_unchecked().mask_reg())) + visitor.visit_disp_masked(self.regs[1], self.disp as i32, RegSpec::mask(self.prefixes.evex_unchecked().mask_reg())) } else { visitor.visit_disp(self.regs[1], self.disp as i32) } } - OperandSpec::RegScale_mask => { + OperandSpec::MemIndexScale_mask => { if self.prefixes.evex_unchecked().mask_reg() != 0 { - visitor.visit_reg_scale_masked(self.regs[2], self.scale, RegSpec::mask(self.prefixes.evex_unchecked().mask_reg())) + visitor.visit_index_scale_masked(self.regs[2], self.scale, RegSpec::mask(self.prefixes.evex_unchecked().mask_reg())) } else { - visitor.visit_reg_scale(self.regs[2], self.scale) + visitor.visit_index_scale(self.regs[2], self.scale) } } - OperandSpec::RegScaleDisp_mask => { + OperandSpec::MemIndexScaleDisp_mask => { if self.prefixes.evex_unchecked().mask_reg() != 0 { - visitor.visit_reg_scale_disp_masked(self.regs[2], self.scale, self.disp as i32, RegSpec::mask(self.prefixes.evex_unchecked().mask_reg())) + visitor.visit_index_scale_disp_masked(self.regs[2], self.scale, self.disp as i32, RegSpec::mask(self.prefixes.evex_unchecked().mask_reg())) } else { - visitor.visit_reg_scale_disp(self.regs[2], self.scale, self.disp as i32) + visitor.visit_index_scale_disp(self.regs[2], self.scale, self.disp as i32) } } - OperandSpec::RegIndexBaseScale_mask => { + OperandSpec::MemBaseIndexScale_mask => { if self.prefixes.evex_unchecked().mask_reg() != 0 { - visitor.visit_index_base_scale_masked(self.regs[1], self.regs[2], self.scale, RegSpec::mask(self.prefixes.evex_unchecked().mask_reg())) + visitor.visit_base_index_scale_masked(self.regs[1], self.regs[2], self.scale, RegSpec::mask(self.prefixes.evex_unchecked().mask_reg())) } else { - visitor.visit_index_base_scale(self.regs[1], self.regs[2], self.scale) + visitor.visit_base_index_scale(self.regs[1], self.regs[2], self.scale) } } - OperandSpec::RegIndexBaseScaleDisp_mask => { + OperandSpec::MemBaseIndexScaleDisp_mask => { if self.prefixes.evex_unchecked().mask_reg() != 0 { - visitor.visit_index_base_scale_disp_masked(self.regs[1], self.regs[2], self.scale, self.disp as i32, RegSpec::mask(self.prefixes.evex_unchecked().mask_reg())) + visitor.visit_base_index_scale_disp_masked(self.regs[1], self.regs[2], self.scale, self.disp as i32, RegSpec::mask(self.prefixes.evex_unchecked().mask_reg())) } else { - visitor.visit_index_base_scale_disp(self.regs[1], self.regs[2], self.scale, self.disp as i32) + visitor.visit_base_index_scale_disp(self.regs[1], self.regs[2], self.scale, self.disp as i32) } } OperandSpec::AbsoluteFarAddress => { @@ -6155,7 +6134,7 @@ fn read_sib< InnerDescription::Misc("mod bits select no base register") .with_id(sib_start + 0) ); - OperandSpec::RegScale + OperandSpec::MemIndexScale } else { sink.record( modrm_start + 6, @@ -6163,7 +6142,7 @@ fn read_sib< InnerDescription::RegisterNumber("mod", 0b101, instr.regs[1]) .with_id(sib_start + 0) ); - OperandSpec::RegIndexBaseScale + OperandSpec::MemBaseIndexScale } } } else { @@ -6182,7 +6161,7 @@ fn read_sib< InnerDescription::RegisterNumber("iii", instr.regs[2].num, instr.regs[2]) .with_id(sib_start + 0) ); - OperandSpec::RegIndexBaseScale + OperandSpec::MemBaseIndexScale } } @@ -6216,7 +6195,7 @@ fn read_sib< InnerDescription::RegisterNumber("mod", 0b101, instr.regs[1]) .with_id(sib_start + 0) ); - OperandSpec::RegDisp + OperandSpec::Disp } } else { sink.record( @@ -6232,7 +6211,7 @@ fn read_sib< InnerDescription::Misc("mod bits select no base register, [index+disp] only") .with_id(sib_start + 0) ); - OperandSpec::RegScaleDisp + OperandSpec::MemIndexScaleDisp } else { sink.record( modrm_start + 6, @@ -6240,7 +6219,7 @@ fn read_sib< InnerDescription::RegisterNumber("mod", 0b101, instr.regs[1]) .with_id(sib_start + 0) ); - OperandSpec::RegIndexBaseScaleDisp + OperandSpec::MemBaseIndexScaleDisp } } } else { @@ -6257,7 +6236,7 @@ fn read_sib< InnerDescription::Misc("iii selects no index register") .with_id(sib_start + 0) ); - OperandSpec::RegDisp + OperandSpec::Disp } else { sink.record( sib_start + 3, @@ -6265,7 +6244,7 @@ fn read_sib< InnerDescription::RegisterNumber("iii", instr.regs[2].num, instr.regs[2]) .with_id(sib_start + 0) ); - OperandSpec::RegIndexBaseScaleDisp + OperandSpec::MemBaseIndexScaleDisp } } }; @@ -6375,7 +6354,7 @@ fn read_M_16bit< Ok(OperandSpec::Deref) } else { instr.scale = 1; - Ok(OperandSpec::RegIndexBaseScale) + Ok(OperandSpec::MemBaseIndexScale) } }, 0b01 => { @@ -6396,13 +6375,13 @@ fn read_M_16bit< ); if mmm > 3 { if instr.disp != 0 { - Ok(OperandSpec::RegDisp) + Ok(OperandSpec::Disp) } else { Ok(OperandSpec::Deref) } } else { instr.scale = 1; - Ok(OperandSpec::RegIndexBaseScaleDisp) + Ok(OperandSpec::MemBaseIndexScaleDisp) } }, 0b10 => { @@ -6423,13 +6402,13 @@ fn read_M_16bit< ); if mmm > 3 { if instr.disp != 0 { - Ok(OperandSpec::RegDisp) + Ok(OperandSpec::Disp) } else { Ok(OperandSpec::Deref) } } else { instr.scale = 1; - Ok(OperandSpec::RegIndexBaseScaleDisp) + Ok(OperandSpec::MemBaseIndexScaleDisp) } }, _ => { @@ -6523,7 +6502,7 @@ fn read_M< OperandSpec::Deref } else { instr.disp = disp as i32 as u32; - OperandSpec::RegDisp + OperandSpec::Disp } } }; diff --git a/src/real_mode/display.rs b/src/real_mode/display.rs index 38f95bb..bea11fc 100644 --- a/src/real_mode/display.rs +++ b/src/real_mode/display.rs @@ -406,9 +406,9 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> } #[cfg_attr(not(feature="profiling"), inline(always))] #[cfg_attr(feature="profiling", inline(never))] - fn visit_disp(&mut self, reg: RegSpec, disp: i32) -> Result { + fn visit_disp(&mut self, base: RegSpec, disp: i32) -> Result { self.f.write_char('[')?; - self.f.write_reg(reg)?; + self.f.write_reg(base)?; self.f.write_fixed_size(" ")?; { @@ -423,23 +423,23 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> } self.f.write_fixed_size("]") } - fn visit_deref(&mut self, reg: RegSpec) -> Result { + fn visit_deref(&mut self, base: RegSpec) -> Result { self.f.write_fixed_size("[")?; - self.f.write_reg(reg)?; + self.f.write_reg(base)?; self.f.write_fixed_size("]") } - fn visit_reg_scale(&mut self, reg: RegSpec, scale: u8) -> Result { + fn visit_index_scale(&mut self, index: RegSpec, scale: u8) -> Result { self.f.write_fixed_size("[")?; - self.f.write_reg(reg)?; + self.f.write_reg(index)?; self.f.write_fixed_size(" * ")?; self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc self.f.write_fixed_size("]")?; Ok(()) } - fn visit_reg_scale_disp(&mut self, reg: RegSpec, scale: u8, disp: i32) -> Result { + fn visit_index_scale_disp(&mut self, index: RegSpec, scale: u8, disp: i32) -> Result { self.f.write_fixed_size("[")?; - self.f.write_reg(reg)?; + self.f.write_reg(index)?; self.f.write_fixed_size(" * ")?; self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc self.f.write_fixed_size(" ")?; @@ -456,7 +456,7 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> } self.f.write_char(']') } - fn visit_index_base_scale(&mut self, base: RegSpec, index: RegSpec, scale: u8) -> Result { + fn visit_base_index_scale(&mut self, base: RegSpec, index: RegSpec, scale: u8) -> Result { self.f.write_fixed_size("[")?; self.f.write_reg(base)?; self.f.write_fixed_size(" + ")?; @@ -465,7 +465,7 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc self.f.write_fixed_size("]") } - fn visit_index_base_scale_disp(&mut self, base: RegSpec, index: RegSpec, scale: u8, disp: i32) -> Result { + fn visit_base_index_scale_disp(&mut self, base: RegSpec, index: RegSpec, scale: u8, disp: i32) -> Result { self.f.write_fixed_size("[")?; self.f.write_reg(base)?; self.f.write_fixed_size(" + ")?; @@ -486,9 +486,9 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> } self.f.write_fixed_size("]") } - fn visit_reg_disp_masked(&mut self, spec: RegSpec, disp: i32, mask_reg: RegSpec) -> Result { + fn visit_disp_masked(&mut self, base: RegSpec, disp: i32, mask_reg: RegSpec) -> Result { self.f.write_char('[')?; - self.f.write_reg(spec)?; + self.f.write_reg(base)?; self.f.write_char(' ')?; let mut v = disp as u32; if disp < 0 { @@ -504,18 +504,18 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> self.f.write_char('}')?; Ok(()) } - fn visit_reg_deref_masked(&mut self, spec: RegSpec, mask_reg: RegSpec) -> Result { + fn visit_deref_masked(&mut self, base: RegSpec, mask_reg: RegSpec) -> Result { self.f.write_fixed_size("[")?; - self.f.write_reg(spec)?; + self.f.write_reg(base)?; self.f.write_fixed_size("]")?; self.f.write_char('{')?; self.f.write_reg(mask_reg)?; self.f.write_char('}')?; Ok(()) } - fn visit_reg_scale_masked(&mut self, spec: RegSpec, scale: u8, mask_reg: RegSpec) -> Result { + fn visit_index_scale_masked(&mut self, index: RegSpec, scale: u8, mask_reg: RegSpec) -> Result { self.f.write_fixed_size("[")?; - self.f.write_reg(spec)?; + self.f.write_reg(index)?; self.f.write_fixed_size(" * ")?; self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc self.f.write_fixed_size("]")?; @@ -524,9 +524,9 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> self.f.write_char('}')?; Ok(()) } - fn visit_reg_scale_disp_masked(&mut self, spec: RegSpec, scale: u8, disp: i32, mask_reg: RegSpec) -> Result { + fn visit_index_scale_disp_masked(&mut self, index: RegSpec, scale: u8, disp: i32, mask_reg: RegSpec) -> Result { self.f.write_fixed_size("[")?; - self.f.write_reg(spec)?; + self.f.write_reg(index)?; self.f.write_fixed_size(" * ")?; self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc self.f.write_fixed_size(" ")?; @@ -544,7 +544,7 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> self.f.write_char('}')?; Ok(()) } - fn visit_index_base_masked(&mut self, base: RegSpec, index: RegSpec, mask_reg: RegSpec) -> Result { + fn visit_base_index_masked(&mut self, base: RegSpec, index: RegSpec, mask_reg: RegSpec) -> Result { self.f.write_fixed_size("[")?; self.f.write_reg(base)?; self.f.write_fixed_size(" + ")?; @@ -555,7 +555,7 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> self.f.write_char('}')?; Ok(()) } - fn visit_index_base_disp_masked(&mut self, base: RegSpec, index: RegSpec, disp: i32, mask_reg: RegSpec) -> Result { + fn visit_base_index_disp_masked(&mut self, base: RegSpec, index: RegSpec, disp: i32, mask_reg: RegSpec) -> Result { self.f.write_fixed_size("[")?; self.f.write_reg(base)?; self.f.write_fixed_size(" + ")?; @@ -575,7 +575,7 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> self.f.write_char('}')?; Ok(()) } - fn visit_index_base_scale_masked(&mut self, base: RegSpec, index: RegSpec, scale: u8, mask_reg: RegSpec) -> Result { + fn visit_base_index_scale_masked(&mut self, base: RegSpec, index: RegSpec, scale: u8, mask_reg: RegSpec) -> Result { self.f.write_fixed_size("[")?; self.f.write_reg(base)?; self.f.write_fixed_size(" + ")?; @@ -588,7 +588,7 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> self.f.write_char('}')?; Ok(()) } - fn visit_index_base_scale_disp_masked(&mut self, base: RegSpec, index: RegSpec, scale: u8, disp: i32, mask_reg: RegSpec) -> Result { + fn visit_base_index_scale_disp_masked(&mut self, base: RegSpec, index: RegSpec, scale: u8, disp: i32, mask_reg: RegSpec) -> Result { self.f.write_fixed_size("[")?; self.f.write_reg(base)?; self.f.write_fixed_size(" + ")?; @@ -2420,7 +2420,7 @@ pub(crate) fn contextualize_c(instr: &Instruction, out: &mut T) let mut out = yaxpeax_arch::display::FmtSink::new(out); use core::fmt::Write; match op { - Operand::ImmediateI8(rel) => { + Operand::ImmediateI8 { imm: rel } => { let rel = if rel >= 0 { out.write_str("$+")?; rel as u8 @@ -2430,7 +2430,7 @@ pub(crate) fn contextualize_c(instr: &Instruction, out: &mut T) }; out.write_prefixed_u8(rel) } - Operand::ImmediateI32(rel) => { + Operand::ImmediateI32 { imm: rel } => { let rel = if rel >= 0 { out.write_str("$+")?; rel as u32 @@ -2824,10 +2824,10 @@ impl<'a, F: DisplaySink> super::OperandVisitor for RelativeBranchPrinter<'a, F> fn visit_reg(&mut self, _reg: RegSpec) -> Result { Ok(false) } - fn visit_deref(&mut self, _reg: RegSpec) -> Result { + fn visit_deref(&mut self, _base: RegSpec) -> Result { Ok(false) } - fn visit_disp(&mut self, _reg: RegSpec, _disp: i32) -> Result { + fn visit_disp(&mut self, _base: RegSpec, _disp: i32) -> Result { Ok(false) } #[cfg_attr(feature="profiling", inline(never))] @@ -2884,16 +2884,16 @@ impl<'a, F: DisplaySink> super::OperandVisitor for RelativeBranchPrinter<'a, F> fn visit_abs_u32(&mut self, _imm: u32) -> Result { Ok(false) } - fn visit_reg_scale(&mut self, _reg: RegSpec, _scale: u8) -> Result { + fn visit_index_scale(&mut self, _index: RegSpec, _scale: u8) -> Result { Ok(false) } - fn visit_index_base_scale(&mut self, _base: RegSpec, _index: RegSpec, _scale: u8) -> Result { + fn visit_base_index_scale(&mut self, _base: RegSpec, _index: RegSpec, _scale: u8) -> Result { Ok(false) } - fn visit_reg_scale_disp(&mut self, _reg: RegSpec, _scale: u8, _disp: i32) -> Result { + fn visit_index_scale_disp(&mut self, _index: RegSpec, _scale: u8, _disp: i32) -> Result { Ok(false) } - fn visit_index_base_scale_disp(&mut self, _base: RegSpec, _index: RegSpec, _scale: u8, _disp: i32) -> Result { + fn visit_base_index_scale_disp(&mut self, _base: RegSpec, _index: RegSpec, _scale: u8, _disp: i32) -> Result { Ok(false) } fn visit_other(&mut self) -> Result { @@ -2908,28 +2908,28 @@ impl<'a, F: DisplaySink> super::OperandVisitor for RelativeBranchPrinter<'a, F> fn visit_reg_mask_merge_sae_noround(&mut self, _spec: RegSpec, _mask: RegSpec, _merge_mode: MergeMode) -> Result { Ok(false) } - fn visit_reg_disp_masked(&mut self, _spec: RegSpec, _disp: i32, _mask_reg: RegSpec) -> Result { + fn visit_disp_masked(&mut self, _base: RegSpec, _disp: i32, _mask_reg: RegSpec) -> Result { Ok(false) } - fn visit_reg_deref_masked(&mut self, _spec: RegSpec, _mask_reg: RegSpec) -> Result { + fn visit_deref_masked(&mut self, _base: RegSpec, _mask_reg: RegSpec) -> Result { Ok(false) } - fn visit_reg_scale_masked(&mut self, _spec: RegSpec, _scale: u8, _mask_reg: RegSpec) -> Result { + fn visit_index_scale_masked(&mut self, _index: RegSpec, _scale: u8, _mask_reg: RegSpec) -> Result { Ok(false) } - fn visit_reg_scale_disp_masked(&mut self, _spec: RegSpec, _scale: u8, _disp: i32, _mask_reg: RegSpec) -> Result { + fn visit_index_scale_disp_masked(&mut self, _index: RegSpec, _scale: u8, _disp: i32, _mask_reg: RegSpec) -> Result { Ok(false) } - fn visit_index_base_masked(&mut self, _base: RegSpec, _index: RegSpec, _mask_reg: RegSpec) -> Result { + fn visit_base_index_masked(&mut self, _base: RegSpec, _index: RegSpec, _mask_reg: RegSpec) -> Result { Ok(false) } - fn visit_index_base_disp_masked(&mut self, _base: RegSpec, _index: RegSpec, _disp: i32, _mask_reg: RegSpec) -> Result { + fn visit_base_index_disp_masked(&mut self, _base: RegSpec, _index: RegSpec, _disp: i32, _mask_reg: RegSpec) -> Result { Ok(false) } - fn visit_index_base_scale_masked(&mut self, _base: RegSpec, _index: RegSpec, _scale: u8, _mask_reg: RegSpec) -> Result { + fn visit_base_index_scale_masked(&mut self, _base: RegSpec, _index: RegSpec, _scale: u8, _mask_reg: RegSpec) -> Result { Ok(false) } - fn visit_index_base_scale_disp_masked(&mut self, _base: RegSpec, _index: RegSpec, _scale: u8, _disp: i32, _mask_reg: RegSpec) -> Result { + fn visit_base_index_scale_disp_masked(&mut self, _base: RegSpec, _index: RegSpec, _scale: u8, _disp: i32, _mask_reg: RegSpec) -> Result { Ok(false) } fn visit_absolute_far_address(&mut self, _segment: u16, _address: u32) -> Result { diff --git a/src/real_mode/mod.rs b/src/real_mode/mod.rs index b29cd1a..b06a525 100644 --- a/src/real_mode/mod.rs +++ b/src/real_mode/mod.rs @@ -306,87 +306,77 @@ enum SizeCode { #[non_exhaustive] pub enum Operand { /// a sign-extended byte - ImmediateI8(i8), + ImmediateI8 { imm: i8 }, /// a zero-extended byte - ImmediateU8(u8), + ImmediateU8 { imm: u8 }, /// a sign-extended word - ImmediateI16(i16), + ImmediateI16 { imm: i16 }, /// a zero-extended word - ImmediateU16(u16), + ImmediateU16 { imm: u16 }, /// a sign-extended dword - ImmediateI32(i32), + ImmediateI32 { imm: i32 }, /// a zero-extended dword - ImmediateU32(u32), + ImmediateU32 { imm: u32 }, /// a bare register operand, such as `rcx`. - Register(RegSpec), + Register { reg: RegSpec }, /// an `avx512` register operand with optional mask register and merge mode, such as /// `zmm3{k4}{z}`. /// /// if the mask register is `k0`, there is no masking applied, and the default x86 operation is /// `MergeMode::Merge`. - RegisterMaskMerge(RegSpec, RegSpec, MergeMode), + RegisterMaskMerge { reg: RegSpec, mask: RegSpec, merge: MergeMode }, /// an `avx512` register operand with optional mask register, merge mode, and suppressed /// exceptions, such as `zmm3{k4}{z}{rd-sae}`. /// /// if the mask register is `k0`, there is no masking applied, and the default x86 operation is /// `MergeMode::Merge`. - RegisterMaskMergeSae(RegSpec, RegSpec, MergeMode, SaeMode), + RegisterMaskMergeSae { reg: RegSpec, mask: RegSpec, merge: MergeMode, sae: SaeMode }, /// an `avx512` register operand with optional mask register, merge mode, and suppressed /// exceptions, with no overridden rounding mode, such as `zmm3{k4}{z}{sae}`. /// /// if the mask register is `k0`, there is no masking applied, and the default x86 operation is /// `MergeMode::Merge`. - RegisterMaskMergeSaeNoround(RegSpec, RegSpec, MergeMode), + RegisterMaskMergeSaeNoround { reg: RegSpec, mask: RegSpec, merge: MergeMode }, /// a memory access to a literal word address. it's extremely rare that a well-formed x86 /// instruction uses this mode. as an example, `[0x1133]` - DisplacementU16(u16), + AbsoluteU16 { addr: u16 }, /// a memory access to a literal qword address. it's relatively rare that a well-formed x86 /// instruction uses this mode, but plausible. for example, `fs:[0x14]`. segment overrides, /// however, are maintained on the instruction itself. - DisplacementU32(u32), + AbsoluteU32 { addr: u32 }, /// a simple dereference of the address held in some register. for example: `[esi]`. - RegDeref(RegSpec), + MemDeref { base: RegSpec }, /// a dereference of the address held in some register with offset. for example: `[esi + 0x14]`. - RegDisp(RegSpec, i32), + MemDisp { base: RegSpec, disp: i32 }, /// a dereference of the address held in some register scaled by 1, 2, 4, or 8. this is almost always used with the `lea` instruction. for example: `[edx * 4]`. - RegScale(RegSpec, u8), - /// a dereference of the address from summing two registers. for example: `[ebp + rax]` - RegIndexBase(RegSpec, RegSpec), - /// a dereference of the address from summing two registers with offset. for example: `[edi + ecx + 0x40]` - RegIndexBaseDisp(RegSpec, RegSpec, i32), + MemIndexScale { index: RegSpec, scale: u8 }, /// a dereference of the address held in some register scaled by 1, 2, 4, or 8 with offset. this is almost always used with the `lea` instruction. for example: `[eax * 4 + 0x30]`. - RegScaleDisp(RegSpec, u8, i32), + MemIndexScaleDisp { index: RegSpec, scale: u8, disp: i32 }, /// a dereference of the address from summing a register and index register scaled by 1, 2, 4, /// or 8. for /// example: `[esi + ecx * 4]` - RegIndexBaseScale(RegSpec, RegSpec, u8), + MemBaseIndexScale { base: RegSpec, index: RegSpec, scale: u8 }, /// a dereference of the address from summing a register and index register scaled by 1, 2, 4, /// or 8, with offset. for /// example: `[esi + ecx * 4 + 0x1234]` - RegIndexBaseScaleDisp(RegSpec, RegSpec, u8, i32), + MemBaseIndexScaleDisp { base: RegSpec, index: RegSpec, scale: u8, disp: i32 }, /// an `avx512` dereference of register with optional masking. for example: `[edx]{k3}` - RegDerefMasked(RegSpec, RegSpec), + MemDerefMasked { base: RegSpec, mask: RegSpec }, /// an `avx512` dereference of register plus offset, with optional masking. for example: `[esp + 0x40]{k3}` - RegDispMasked(RegSpec, i32, RegSpec), + MemDispMasked { base: RegSpec, disp: i32, mask: RegSpec }, /// an `avx512` dereference of a register scaled by 1, 2, 4, or 8, with optional masking. this /// seems extraordinarily unlikely to occur in practice. for example: `[esi * 4]{k2}` - RegScaleMasked(RegSpec, u8, RegSpec), - /// an `avx512` dereference of a register plus index scaled by 1, 2, 4, or 8, with optional masking. - /// for example: `[esi + eax * 4]{k6}` - RegIndexBaseMasked(RegSpec, RegSpec, RegSpec), - /// an `avx512` dereference of a register plus offset, with optional masking. for example: - /// `[esi + eax + 0x1313]{k6}` - RegIndexBaseDispMasked(RegSpec, RegSpec, i32, RegSpec), + MemIndexScaleMasked { index: RegSpec, scale: u8, mask: RegSpec }, /// an `avx512` dereference of a register scaled by 1, 2, 4, or 8 plus offset, with optional /// masking. this seems extraordinarily unlikely to occur in practice. for example: `[esi * /// 4 + 0x1357]{k2}` - RegScaleDispMasked(RegSpec, u8, i32, RegSpec), + MemIndexScaleDispMasked { index: RegSpec, scale: u8, disp: i32, mask: RegSpec }, /// an `avx512` dereference of a register plus index scaled by 1, 2, 4, or 8, with optional /// masking. for example: `[esi + eax * 4]{k6}` - RegIndexBaseScaleMasked(RegSpec, RegSpec, u8, RegSpec), + MemBaseIndexScaleMasked { base: RegSpec, index: RegSpec, scale: u8, mask: RegSpec }, /// an `avx512` dereference of a register plus index scaled by 1, 2, 4, or 8 and offset, with /// optional masking. for example: `[esi + eax * 4 + 0x1313]{k6}` - RegIndexBaseScaleDispMasked(RegSpec, RegSpec, u8, i32, RegSpec), + MemBaseIndexScaleDispMasked { base: RegSpec, index: RegSpec, scale: u8, disp: i32, mask: RegSpec }, /// no operand. it is a bug for `yaxpeax-x86` to construct an `Operand` of this kind for public /// use; the instruction's `operand_count` should be reduced so as to make this invisible to /// library clients. @@ -403,11 +393,11 @@ impl OperandSpec { OperandSpec::RegMMM => OperandSpec::RegMMM_maskmerge, OperandSpec::RegVex => OperandSpec::RegVex_maskmerge, OperandSpec::Deref => OperandSpec::Deref_mask, - OperandSpec::RegDisp => OperandSpec::RegDisp_mask, - OperandSpec::RegScale => OperandSpec::RegScale_mask, - OperandSpec::RegScaleDisp => OperandSpec::RegScaleDisp_mask, - OperandSpec::RegIndexBaseScale => OperandSpec::RegIndexBaseScale_mask, - OperandSpec::RegIndexBaseScaleDisp => OperandSpec::RegIndexBaseScaleDisp_mask, + OperandSpec::Disp => OperandSpec::Disp_mask, + OperandSpec::MemIndexScale => OperandSpec::MemIndexScale_mask, + OperandSpec::MemIndexScaleDisp => OperandSpec::MemIndexScaleDisp_mask, + OperandSpec::MemBaseIndexScale => OperandSpec::MemBaseIndexScale_mask, + OperandSpec::MemBaseIndexScaleDisp => OperandSpec::MemBaseIndexScaleDisp_mask, o => o, } } @@ -485,12 +475,12 @@ pub trait OperandVisitor { type Error; fn visit_reg(&mut self, reg: RegSpec) -> Result; - fn visit_deref(&mut self, reg: RegSpec) -> Result; - fn visit_disp(&mut self, reg: RegSpec, disp: i32) -> Result; - fn visit_reg_scale(&mut self, reg: RegSpec, scale: u8) -> Result; - fn visit_index_base_scale(&mut self, base: RegSpec, index: RegSpec, scale: u8) -> Result; - fn visit_index_base_scale_disp(&mut self, base: RegSpec, index: RegSpec, scale: u8, disp: i32) -> Result; - fn visit_reg_scale_disp(&mut self, reg: RegSpec, scale: u8, disp: i32) -> Result; + fn visit_deref(&mut self, base: RegSpec) -> Result; + fn visit_disp(&mut self, base: RegSpec, disp: i32) -> Result; + fn visit_index_scale(&mut self, index: RegSpec, scale: u8) -> Result; + fn visit_base_index_scale(&mut self, base: RegSpec, index: RegSpec, scale: u8) -> Result; + fn visit_base_index_scale_disp(&mut self, base: RegSpec, index: RegSpec, scale: u8, disp: i32) -> Result; + fn visit_index_scale_disp(&mut self, index: RegSpec, scale: u8, disp: i32) -> Result; fn visit_i8(&mut self, imm: i8) -> Result; fn visit_u8(&mut self, imm: u8) -> Result; fn visit_i16(&mut self, imm: i16) -> Result; @@ -502,14 +492,14 @@ pub trait OperandVisitor { fn visit_reg_mask_merge(&mut self, base: RegSpec, mask: RegSpec, merge_mode: MergeMode) -> Result; fn visit_reg_mask_merge_sae(&mut self, base: RegSpec, mask: RegSpec, merge_mode: MergeMode, sae_mode: SaeMode) -> Result; fn visit_reg_mask_merge_sae_noround(&mut self, base: RegSpec, mask: RegSpec, merge_mode: MergeMode) -> Result; - fn visit_reg_disp_masked(&mut self, base: RegSpec, disp: i32, mask_reg: RegSpec) -> Result; - fn visit_reg_deref_masked(&mut self, base: RegSpec, mask_reg: RegSpec) -> Result; - fn visit_reg_scale_masked(&mut self, base: RegSpec, scale: u8, mask_reg: RegSpec) -> Result; - fn visit_reg_scale_disp_masked(&mut self, base: RegSpec, scale: u8, disp: i32, mask_reg: RegSpec) -> Result; - fn visit_index_base_masked(&mut self, base: RegSpec, index: RegSpec, mask_reg: RegSpec) -> Result; - fn visit_index_base_disp_masked(&mut self, base: RegSpec, index: RegSpec, disp: i32, mask_reg: RegSpec) -> Result; - fn visit_index_base_scale_masked(&mut self, base: RegSpec, index: RegSpec, scale: u8, mask_reg: RegSpec) -> Result; - fn visit_index_base_scale_disp_masked(&mut self, base: RegSpec, index: RegSpec, scale: u8, disp: i32, mask_reg: RegSpec) -> Result; + fn visit_disp_masked(&mut self, base: RegSpec, disp: i32, mask_reg: RegSpec) -> Result; + fn visit_deref_masked(&mut self, base: RegSpec, mask_reg: RegSpec) -> Result; + fn visit_index_scale_masked(&mut self, index: RegSpec, scale: u8, mask_reg: RegSpec) -> Result; + fn visit_index_scale_disp_masked(&mut self, index: RegSpec, scale: u8, disp: i32, mask_reg: RegSpec) -> Result; + fn visit_base_index_masked(&mut self, base: RegSpec, index: RegSpec, mask_reg: RegSpec) -> Result; + fn visit_base_index_disp_masked(&mut self, base: RegSpec, index: RegSpec, disp: i32, mask_reg: RegSpec) -> Result; + fn visit_base_index_scale_masked(&mut self, base: RegSpec, index: RegSpec, scale: u8, mask_reg: RegSpec) -> Result; + fn visit_base_index_scale_disp_masked(&mut self, base: RegSpec, index: RegSpec, scale: u8, disp: i32, mask_reg: RegSpec) -> Result; fn visit_absolute_far_address(&mut self, segment: u16, address: u32) -> Result; fn visit_other(&mut self) -> Result; @@ -523,139 +513,139 @@ impl Operand { } // the register in regs[0] OperandSpec::RegRRR => { - Operand::Register(inst.regs[0]) + Operand::Register { reg: inst.regs[0] } } OperandSpec::RegRRR_maskmerge => { - Operand::RegisterMaskMerge( - inst.regs[0], - RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()), - MergeMode::from(inst.prefixes.evex_unchecked().merge()), - ) + Operand::RegisterMaskMerge { + reg: inst.regs[0], + mask: RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()), + merge: MergeMode::from(inst.prefixes.evex_unchecked().merge()), + } } OperandSpec::RegRRR_maskmerge_sae => { - Operand::RegisterMaskMergeSae( - inst.regs[0], - RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()), - MergeMode::from(inst.prefixes.evex_unchecked().merge()), - SaeMode::from(inst.prefixes.evex_unchecked().vex().l(), inst.prefixes.evex_unchecked().lp()), - ) + Operand::RegisterMaskMergeSae { + reg: inst.regs[0], + mask: RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()), + merge: MergeMode::from(inst.prefixes.evex_unchecked().merge()), + sae: SaeMode::from(inst.prefixes.evex_unchecked().vex().l(), inst.prefixes.evex_unchecked().lp()), + } } OperandSpec::RegRRR_maskmerge_sae_noround => { - Operand::RegisterMaskMergeSaeNoround( - inst.regs[0], - RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()), - MergeMode::from(inst.prefixes.evex_unchecked().merge()), - ) + Operand::RegisterMaskMergeSaeNoround { + reg: inst.regs[0], + mask: RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()), + merge: MergeMode::from(inst.prefixes.evex_unchecked().merge()), + } } // the register in regs[1] (eg modrm mod bits were 11) OperandSpec::RegMMM => { - Operand::Register(inst.regs[1]) + Operand::Register { reg: inst.regs[1] } } OperandSpec::RegMMM_maskmerge => { - Operand::RegisterMaskMerge( - inst.regs[1], - RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()), - MergeMode::from(inst.prefixes.evex_unchecked().merge()), - ) + Operand::RegisterMaskMerge { + reg: inst.regs[1], + mask: RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()), + merge: MergeMode::from(inst.prefixes.evex_unchecked().merge()), + } } OperandSpec::RegMMM_maskmerge_sae_noround => { - Operand::RegisterMaskMergeSaeNoround( - inst.regs[1], - RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()), - MergeMode::from(inst.prefixes.evex_unchecked().merge()), - ) + Operand::RegisterMaskMergeSaeNoround { + reg: inst.regs[1], + mask: RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()), + merge: MergeMode::from(inst.prefixes.evex_unchecked().merge()), + } } OperandSpec::RegVex => { - Operand::Register(inst.regs[3]) + Operand::Register { reg: inst.regs[3] } } OperandSpec::RegVex_maskmerge => { - Operand::RegisterMaskMerge( - inst.regs[3], - RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()), - MergeMode::from(inst.prefixes.evex_unchecked().merge()), - ) + Operand::RegisterMaskMerge { + reg: inst.regs[3], + mask: RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()), + merge: MergeMode::from(inst.prefixes.evex_unchecked().merge()), + } } OperandSpec::Reg4 => { - Operand::Register(RegSpec { num: inst.imm as u8, bank: inst.regs[3].bank }) - } - OperandSpec::ImmI8 => Operand::ImmediateI8(inst.imm as i8), - OperandSpec::ImmU8 => Operand::ImmediateU8(inst.imm as u8), - OperandSpec::ImmI16 => Operand::ImmediateI16(inst.imm as i16), - OperandSpec::ImmU16 => Operand::ImmediateU16(inst.imm as u16), - OperandSpec::ImmI32 => Operand::ImmediateI32(inst.imm as i32), - OperandSpec::ImmInDispField => Operand::ImmediateU16(inst.disp as u16), - OperandSpec::DispU16 => Operand::DisplacementU16(inst.disp as u16), - OperandSpec::DispU32 => Operand::DisplacementU32(inst.disp), + Operand::Register { reg: RegSpec { num: inst.imm as u8, bank: inst.regs[3].bank } } + } + OperandSpec::ImmI8 => Operand::ImmediateI8 { imm: inst.imm as i8 }, + OperandSpec::ImmU8 => Operand::ImmediateU8 { imm: inst.imm as u8 }, + OperandSpec::ImmI16 => Operand::ImmediateI16 { imm: inst.imm as i16 }, + OperandSpec::ImmU16 => Operand::ImmediateU16 { imm: inst.imm as u16 }, + OperandSpec::ImmI32 => Operand::ImmediateI32 { imm: inst.imm as i32 }, + OperandSpec::ImmInDispField => Operand::ImmediateU16 { imm: inst.disp as u16 }, + OperandSpec::DispU16 => Operand::AbsoluteU16 { addr: inst.disp as u16 }, + OperandSpec::DispU32 => Operand::AbsoluteU32 { addr: inst.disp }, OperandSpec::Deref => { - Operand::RegDeref(inst.regs[1]) + Operand::MemDeref { base: inst.regs[1] } } OperandSpec::Deref_si => { - Operand::RegDeref(RegSpec::si()) + Operand::MemDeref { base: RegSpec::si() } } OperandSpec::Deref_di => { - Operand::RegDeref(RegSpec::di()) + Operand::MemDeref { base: RegSpec::di() } } OperandSpec::Deref_esi => { - Operand::RegDeref(RegSpec::esi()) + Operand::MemDeref { base: RegSpec::esi() } } OperandSpec::Deref_edi => { - Operand::RegDeref(RegSpec::edi()) + Operand::MemDeref { base: RegSpec::edi() } } - OperandSpec::RegDisp => { - Operand::RegDisp(inst.regs[1], inst.disp as i32) + OperandSpec::Disp => { + Operand::MemDisp { base: inst.regs[1], disp: inst.disp as i32 } } - OperandSpec::RegScale => { - Operand::RegScale(inst.regs[2], inst.scale) + OperandSpec::MemIndexScale => { + Operand::MemIndexScale { index: inst.regs[2], scale: inst.scale } } - OperandSpec::RegScaleDisp => { - Operand::RegScaleDisp(inst.regs[2], inst.scale, inst.disp as i32) + OperandSpec::MemIndexScaleDisp => { + Operand::MemIndexScaleDisp { index: inst.regs[2], scale: inst.scale, disp: inst.disp as i32 } } - OperandSpec::RegIndexBaseScale => { - Operand::RegIndexBaseScale(inst.regs[1], inst.regs[2], inst.scale) + OperandSpec::MemBaseIndexScale => { + Operand::MemBaseIndexScale { base: inst.regs[1], index: inst.regs[2], scale: inst.scale } } - OperandSpec::RegIndexBaseScaleDisp => { - Operand::RegIndexBaseScaleDisp(inst.regs[1], inst.regs[2], inst.scale, inst.disp as i32) + OperandSpec::MemBaseIndexScaleDisp => { + Operand::MemBaseIndexScaleDisp { base: inst.regs[1], index: inst.regs[2], scale: inst.scale, disp: inst.disp as i32 } } OperandSpec::Deref_mask => { if inst.prefixes.evex_unchecked().mask_reg() != 0 { - Operand::RegDerefMasked(inst.regs[1], RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg())) + Operand::MemDerefMasked { base: inst.regs[1], mask: RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()) } } else { - Operand::RegDeref(inst.regs[1]) + Operand::MemDeref { base: inst.regs[1] } } } - OperandSpec::RegDisp_mask => { + OperandSpec::Disp_mask => { if inst.prefixes.evex_unchecked().mask_reg() != 0 { - Operand::RegDispMasked(inst.regs[1], inst.disp as i32, RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg())) + Operand::MemDispMasked { base: inst.regs[1], disp: inst.disp as i32, mask: RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()) } } else { - Operand::RegDisp(inst.regs[1], inst.disp as i32) + Operand::MemDisp { base: inst.regs[1], disp: inst.disp as i32 } } } - OperandSpec::RegScale_mask => { + OperandSpec::MemIndexScale_mask => { if inst.prefixes.evex_unchecked().mask_reg() != 0 { - Operand::RegScaleMasked(inst.regs[2], inst.scale, RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg())) + Operand::MemIndexScaleMasked { index: inst.regs[2], scale: inst.scale, mask: RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()) } } else { - Operand::RegScale(inst.regs[2], inst.scale) + Operand::MemIndexScale { index: inst.regs[2], scale: inst.scale } } } - OperandSpec::RegScaleDisp_mask => { + OperandSpec::MemIndexScaleDisp_mask => { if inst.prefixes.evex_unchecked().mask_reg() != 0 { - Operand::RegScaleDispMasked(inst.regs[2], inst.scale, inst.disp as i32, RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg())) + Operand::MemIndexScaleDispMasked { index: inst.regs[2], scale: inst.scale, disp: inst.disp as i32, mask: RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()) } } else { - Operand::RegScaleDisp(inst.regs[2], inst.scale, inst.disp as i32) + Operand::MemIndexScaleDisp { index: inst.regs[2], scale: inst.scale, disp: inst.disp as i32 } } } - OperandSpec::RegIndexBaseScale_mask => { + OperandSpec::MemBaseIndexScale_mask => { if inst.prefixes.evex_unchecked().mask_reg() != 0 { - Operand::RegIndexBaseScaleMasked(inst.regs[1], inst.regs[2], inst.scale, RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg())) + Operand::MemBaseIndexScaleMasked { base: inst.regs[1], index: inst.regs[2], scale: inst.scale, mask: RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()) } } else { - Operand::RegIndexBaseScale(inst.regs[1], inst.regs[2], inst.scale) + Operand::MemBaseIndexScale { base: inst.regs[1], index: inst.regs[2], scale: inst.scale } } } - OperandSpec::RegIndexBaseScaleDisp_mask => { + OperandSpec::MemBaseIndexScaleDisp_mask => { if inst.prefixes.evex_unchecked().mask_reg() != 0 { - Operand::RegIndexBaseScaleDispMasked(inst.regs[1], inst.regs[2], inst.scale, inst.disp as i32, RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg())) + Operand::MemBaseIndexScaleDispMasked { base: inst.regs[1], index: inst.regs[2], scale: inst.scale, disp: inst.disp as i32, mask: RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()) } } else { - Operand::RegIndexBaseScaleDisp(inst.regs[1], inst.regs[2], inst.scale, inst.disp as i32) + Operand::MemBaseIndexScaleDisp { base: inst.regs[1], index: inst.regs[2], scale: inst.scale, disp: inst.disp as i32 } } } OperandSpec::AbsoluteFarAddress => { @@ -672,36 +662,32 @@ impl Operand { /// memory. pub fn is_memory(&self) -> bool { match self { - Operand::DisplacementU16(_) | - Operand::DisplacementU32(_) | - Operand::RegDeref(_) | - Operand::RegDisp(_, _) | - Operand::RegScale(_, _) | - Operand::RegIndexBase(_, _) | - Operand::RegIndexBaseDisp(_, _, _) | - Operand::RegScaleDisp(_, _, _) | - Operand::RegIndexBaseScale(_, _, _) | - Operand::RegIndexBaseScaleDisp(_, _, _, _) | - Operand::RegDerefMasked(_, _) | - Operand::RegDispMasked(_, _, _) | - Operand::RegScaleMasked(_, _, _) | - Operand::RegIndexBaseMasked(_, _, _) | - Operand::RegIndexBaseDispMasked(_, _, _, _) | - Operand::RegScaleDispMasked(_, _, _, _) | - Operand::RegIndexBaseScaleMasked(_, _, _, _) | - Operand::RegIndexBaseScaleDispMasked(_, _, _, _, _) => { + Operand::AbsoluteU16 { .. } | + Operand::AbsoluteU32 { .. } | + Operand::MemDeref { .. } | + Operand::MemDisp { .. } | + Operand::MemIndexScale { .. } | + Operand::MemIndexScaleDisp { .. } | + Operand::MemBaseIndexScale { .. } | + Operand::MemBaseIndexScaleDisp { .. } | + Operand::MemDerefMasked { .. } | + Operand::MemDispMasked { .. } | + Operand::MemIndexScaleMasked { .. } | + Operand::MemIndexScaleDispMasked { .. } | + Operand::MemBaseIndexScaleMasked { .. } | + Operand::MemBaseIndexScaleDispMasked { .. } => { true }, - Operand::ImmediateI8(_) | - Operand::ImmediateU8(_) | - Operand::ImmediateI16(_) | - Operand::ImmediateU16(_) | - Operand::ImmediateU32(_) | - Operand::ImmediateI32(_) | - Operand::Register(_) | - Operand::RegisterMaskMerge(_, _, _) | - Operand::RegisterMaskMergeSae(_, _, _, _) | - Operand::RegisterMaskMergeSaeNoround(_, _, _) | + Operand::ImmediateI8 { .. } | + Operand::ImmediateU8 { .. } | + Operand::ImmediateI16 { .. } | + Operand::ImmediateU16 { .. } | + Operand::ImmediateU32 { .. } | + Operand::ImmediateI32 { .. } | + Operand::Register { .. } | + Operand::RegisterMaskMerge { .. } | + Operand::RegisterMaskMergeSae { .. } | + Operand::RegisterMaskMergeSaeNoround { .. } | Operand::AbsoluteFarAddress { .. } | Operand::Nothing => { false @@ -714,22 +700,22 @@ impl Operand { /// `Operand` came from; `None` here means the authoritative width is `instr.mem_size()`. pub fn width(&self) -> Option { match self { - Operand::Register(reg) => { + Operand::Register { reg } => { Some(reg.width()) } - Operand::RegisterMaskMerge(reg, _, _) => { + Operand::RegisterMaskMerge { reg, .. } => { Some(reg.width()) } - Operand::ImmediateI8(_) | - Operand::ImmediateU8(_) => { + Operand::ImmediateI8 { .. } | + Operand::ImmediateU8 { .. } => { Some(1) } - Operand::ImmediateI16(_) | - Operand::ImmediateU16(_) => { + Operand::ImmediateI16 { .. } | + Operand::ImmediateU16 { .. } => { Some(2) } - Operand::ImmediateI32(_) | - Operand::ImmediateU32(_) => { + Operand::ImmediateI32 { .. } | + Operand::ImmediateU32 { .. } => { Some(4) } // memory operands or `Nothing` @@ -747,40 +733,36 @@ impl Operand { Operand::Nothing => { visitor.visit_other() } - Operand::Register(reg) => { + Operand::Register { reg } => { visitor.visit_reg(*reg) } - Operand::RegDeref(reg) => { - visitor.visit_deref(*reg) - } - Operand::RegDisp(reg, disp) => { - visitor.visit_disp(*reg, *disp) - } - Operand::ImmediateI8(imm) => visitor.visit_i8(*imm), - Operand::ImmediateU8(imm) => visitor.visit_u8(*imm), - Operand::ImmediateI16(imm) => visitor.visit_i16(*imm), - Operand::ImmediateU16(imm) => visitor.visit_u16(*imm), - Operand::ImmediateI32(imm) => visitor.visit_i32(*imm), - Operand::ImmediateU32(imm) => visitor.visit_u32(*imm), - Operand::DisplacementU16(disp) => visitor.visit_abs_u16(*disp), - Operand::DisplacementU32(disp) => visitor.visit_abs_u32(*disp), - Operand::RegScale(reg, scale) => visitor.visit_reg_scale(*reg, *scale), - Operand::RegScaleDisp(reg, scale, disp) => visitor.visit_reg_scale_disp(*reg, *scale, *disp), - Operand::RegIndexBase(_, _) => { /* not actually reachable anymore */ visitor.visit_other() }, - Operand::RegIndexBaseDisp(_, _, _) => { /* not actually reachable anymore */ visitor.visit_other() }, - Operand::RegIndexBaseScale(base, index, scale) => visitor.visit_index_base_scale(*base, *index, *scale), - Operand::RegIndexBaseScaleDisp(base, index, scale, disp) => visitor.visit_index_base_scale_disp(*base, *index, *scale, *disp), - Operand::RegisterMaskMerge(reg, mask, merge) => visitor.visit_reg_mask_merge(*reg, *mask, *merge), - Operand::RegisterMaskMergeSae(reg, mask, merge, sae) => visitor.visit_reg_mask_merge_sae(*reg, *mask, *merge, *sae), - Operand::RegisterMaskMergeSaeNoround(reg, mask, merge) => visitor.visit_reg_mask_merge_sae_noround(*reg, *mask, *merge), - Operand::RegDerefMasked(reg, mask) => visitor.visit_reg_deref_masked(*reg, *mask), - Operand::RegDispMasked(reg, disp, mask) => visitor.visit_reg_disp_masked(*reg, *disp, *mask), - Operand::RegScaleMasked(reg, scale, mask) => visitor.visit_reg_scale_masked(*reg, *scale, *mask), - Operand::RegIndexBaseMasked(_, _, _) => { /* not actually reachable anymore */ visitor.visit_other() }, - Operand::RegIndexBaseDispMasked(_, _, _, _) => { /* not actually reachable anymore */ visitor.visit_other() }, - Operand::RegScaleDispMasked(base, scale, disp, mask) => visitor.visit_reg_scale_disp_masked(*base, *scale, *disp, *mask), - Operand::RegIndexBaseScaleMasked(base, index, scale, mask) => visitor.visit_index_base_scale_masked(*base, *index, *scale, *mask), - Operand::RegIndexBaseScaleDispMasked(base, index, scale, disp, mask) => visitor.visit_index_base_scale_disp_masked(*base, *index, *scale, *disp, *mask), + Operand::MemDeref { base } => { + visitor.visit_deref(*base) + } + Operand::MemDisp { base, disp } => { + visitor.visit_disp(*base, *disp) + } + Operand::ImmediateI8 { imm } => visitor.visit_i8(*imm), + Operand::ImmediateU8 { imm } => visitor.visit_u8(*imm), + Operand::ImmediateI16 { imm } => visitor.visit_i16(*imm), + Operand::ImmediateU16 { imm } => visitor.visit_u16(*imm), + Operand::ImmediateI32 { imm } => visitor.visit_i32(*imm), + Operand::ImmediateU32 { imm } => visitor.visit_u32(*imm), + Operand::AbsoluteU16 { addr } => visitor.visit_abs_u16(*addr), + Operand::AbsoluteU32 { addr } => visitor.visit_abs_u32(*addr), + Operand::MemIndexScale { index, scale } => visitor.visit_index_scale(*index, *scale), + Operand::MemIndexScaleDisp { index, scale, disp } => visitor.visit_index_scale_disp(*index, *scale, *disp), + Operand::MemBaseIndexScale { base, index, scale } => visitor.visit_base_index_scale(*base, *index, *scale), + Operand::MemBaseIndexScaleDisp { base, index, scale, disp } => visitor.visit_base_index_scale_disp(*base, *index, *scale, *disp), + Operand::RegisterMaskMerge { reg, mask, merge } => visitor.visit_reg_mask_merge(*reg, *mask, *merge), + Operand::RegisterMaskMergeSae { reg, mask, merge, sae } => visitor.visit_reg_mask_merge_sae(*reg, *mask, *merge, *sae), + Operand::RegisterMaskMergeSaeNoround { reg, mask, merge } => visitor.visit_reg_mask_merge_sae_noround(*reg, *mask, *merge), + Operand::MemDerefMasked { base, mask } => visitor.visit_deref_masked(*base, *mask), + Operand::MemDispMasked { base, disp, mask } => visitor.visit_disp_masked(*base, *disp, *mask), + Operand::MemIndexScaleMasked { index, scale, mask } => visitor.visit_index_scale_masked(*index, *scale, *mask), + Operand::MemIndexScaleDispMasked { index, scale, disp, mask } => visitor.visit_index_scale_disp_masked(*index, *scale, *disp, *mask), + Operand::MemBaseIndexScaleMasked { base, index, scale, mask } => visitor.visit_base_index_scale_masked(*base, *index, *scale, *mask), + Operand::MemBaseIndexScaleDispMasked { base, index, scale, disp, mask } => visitor.visit_base_index_scale_disp_masked(*base, *index, *scale, *disp, *mask), Operand::AbsoluteFarAddress { segment, address } => visitor.visit_absolute_far_address(*segment, *address), } } @@ -865,16 +847,16 @@ const REGISTER_CLASS_NAMES: &[&'static str] = &[ /// } /// } /// -/// if let Operand::Register(regspec) = instruction.operand(0) { +/// if let Operand::Register { reg } = instruction.operand(0) { /// #[cfg(feature="fmt")] -/// println!("first operand is {}", regspec); -/// show_register_class_info(regspec.class()); +/// println!("first operand is {}", reg); +/// show_register_class_info(reg.class()); /// } /// -/// if let Operand::Register(regspec) = instruction.operand(1) { +/// if let Operand::Register { reg } = instruction.operand(1) { /// #[cfg(feature="fmt")] -/// println!("first operand is {}", regspec); -/// show_register_class_info(regspec.class()); +/// println!("second operand is {}", reg); +/// show_register_class_info(reg.class()); /// } /// ``` /// @@ -2667,17 +2649,17 @@ enum OperandSpec { Deref_di = 0x90, Deref_esi = 0x91, Deref_edi = 0x92, - RegDisp = 0x93, - RegScale = 0x94, - RegScaleDisp = 0x95, - RegIndexBaseScale = 0x96, - RegIndexBaseScaleDisp = 0x97, + Disp = 0x93, + MemIndexScale = 0x94, + MemIndexScaleDisp = 0x95, + MemBaseIndexScale = 0x96, + MemBaseIndexScaleDisp = 0x97, Deref_mask = 0xce, - RegDisp_mask = 0xd3, - RegScale_mask = 0xd4, - RegScaleDisp_mask = 0xd5, - RegIndexBaseScale_mask = 0xd6, - RegIndexBaseScaleDisp_mask = 0xd7, + Disp_mask = 0xd3, + MemIndexScale_mask = 0xd4, + MemIndexScaleDisp_mask = 0xd5, + MemBaseIndexScale_mask = 0xd6, + MemBaseIndexScaleDisp_mask = 0xd7, // u16:u{16,32} immediate address for a far call AbsoluteFarAddress = 0x18, } @@ -4385,7 +4367,7 @@ impl Instruction { // visitor.visit_other() visitor.visit_deref(RegSpec::edi()) } - OperandSpec::RegDisp => { + OperandSpec::Disp => { visitor.visit_disp(self.regs[1], self.disp as i32) } OperandSpec::RegRRR_maskmerge => { @@ -4439,61 +4421,58 @@ impl Instruction { OperandSpec::ImmInDispField => visitor.visit_u16(self.disp as u16), OperandSpec::DispU16 => visitor.visit_abs_u16(self.disp as u16), OperandSpec::DispU32 => visitor.visit_abs_u32(self.disp as u32), - OperandSpec::RegScale => { - visitor.visit_reg_scale(self.regs[2], self.scale) + OperandSpec::MemIndexScale => { + visitor.visit_index_scale(self.regs[2], self.scale) } - OperandSpec::RegScaleDisp => { - visitor.visit_reg_scale_disp(self.regs[2], self.scale, self.disp as i32) + OperandSpec::MemIndexScaleDisp => { + visitor.visit_index_scale_disp(self.regs[2], self.scale, self.disp as i32) } - OperandSpec::RegIndexBaseScale => { - visitor.visit_index_base_scale(self.regs[1], self.regs[2], self.scale) - /* - Operand::RegIndexBaseScale(self.regs[1], self.regs[2], self.scale) - */ + OperandSpec::MemBaseIndexScale => { + visitor.visit_base_index_scale(self.regs[1], self.regs[2], self.scale) } - OperandSpec::RegIndexBaseScaleDisp => { - visitor.visit_index_base_scale_disp(self.regs[1], self.regs[2], self.scale, self.disp as i32) + OperandSpec::MemBaseIndexScaleDisp => { + visitor.visit_base_index_scale_disp(self.regs[1], self.regs[2], self.scale, self.disp as i32) } OperandSpec::Deref_mask => { if self.prefixes.evex_unchecked().mask_reg() != 0 { - visitor.visit_reg_deref_masked(self.regs[1], RegSpec::mask(self.prefixes.evex_unchecked().mask_reg())) + visitor.visit_deref_masked(self.regs[1], RegSpec::mask(self.prefixes.evex_unchecked().mask_reg())) } else { visitor.visit_deref(self.regs[1]) } } - OperandSpec::RegDisp_mask => { + OperandSpec::Disp_mask => { if self.prefixes.evex_unchecked().mask_reg() != 0 { - visitor.visit_reg_disp_masked(self.regs[1], self.disp as i32, RegSpec::mask(self.prefixes.evex_unchecked().mask_reg())) + visitor.visit_disp_masked(self.regs[1], self.disp as i32, RegSpec::mask(self.prefixes.evex_unchecked().mask_reg())) } else { visitor.visit_disp(self.regs[1], self.disp as i32) } } - OperandSpec::RegScale_mask => { + OperandSpec::MemIndexScale_mask => { if self.prefixes.evex_unchecked().mask_reg() != 0 { - visitor.visit_reg_scale_masked(self.regs[2], self.scale, RegSpec::mask(self.prefixes.evex_unchecked().mask_reg())) + visitor.visit_index_scale_masked(self.regs[2], self.scale, RegSpec::mask(self.prefixes.evex_unchecked().mask_reg())) } else { - visitor.visit_reg_scale(self.regs[2], self.scale) + visitor.visit_index_scale(self.regs[2], self.scale) } } - OperandSpec::RegScaleDisp_mask => { + OperandSpec::MemIndexScaleDisp_mask => { if self.prefixes.evex_unchecked().mask_reg() != 0 { - visitor.visit_reg_scale_disp_masked(self.regs[2], self.scale, self.disp as i32, RegSpec::mask(self.prefixes.evex_unchecked().mask_reg())) + visitor.visit_index_scale_disp_masked(self.regs[2], self.scale, self.disp as i32, RegSpec::mask(self.prefixes.evex_unchecked().mask_reg())) } else { - visitor.visit_reg_scale_disp(self.regs[2], self.scale, self.disp as i32) + visitor.visit_index_scale_disp(self.regs[2], self.scale, self.disp as i32) } } - OperandSpec::RegIndexBaseScale_mask => { + OperandSpec::MemBaseIndexScale_mask => { if self.prefixes.evex_unchecked().mask_reg() != 0 { - visitor.visit_index_base_scale_masked(self.regs[1], self.regs[2], self.scale, RegSpec::mask(self.prefixes.evex_unchecked().mask_reg())) + visitor.visit_base_index_scale_masked(self.regs[1], self.regs[2], self.scale, RegSpec::mask(self.prefixes.evex_unchecked().mask_reg())) } else { - visitor.visit_index_base_scale(self.regs[1], self.regs[2], self.scale) + visitor.visit_base_index_scale(self.regs[1], self.regs[2], self.scale) } } - OperandSpec::RegIndexBaseScaleDisp_mask => { + OperandSpec::MemBaseIndexScaleDisp_mask => { if self.prefixes.evex_unchecked().mask_reg() != 0 { - visitor.visit_index_base_scale_disp_masked(self.regs[1], self.regs[2], self.scale, self.disp as i32, RegSpec::mask(self.prefixes.evex_unchecked().mask_reg())) + visitor.visit_base_index_scale_disp_masked(self.regs[1], self.regs[2], self.scale, self.disp as i32, RegSpec::mask(self.prefixes.evex_unchecked().mask_reg())) } else { - visitor.visit_index_base_scale_disp(self.regs[1], self.regs[2], self.scale, self.disp as i32) + visitor.visit_base_index_scale_disp(self.regs[1], self.regs[2], self.scale, self.disp as i32) } } OperandSpec::AbsoluteFarAddress => { @@ -6155,7 +6134,7 @@ fn read_sib< InnerDescription::Misc("mod bits select no base register") .with_id(sib_start + 0) ); - OperandSpec::RegScale + OperandSpec::MemIndexScale } else { sink.record( modrm_start + 6, @@ -6163,7 +6142,7 @@ fn read_sib< InnerDescription::RegisterNumber("mod", 0b101, instr.regs[1]) .with_id(sib_start + 0) ); - OperandSpec::RegIndexBaseScale + OperandSpec::MemBaseIndexScale } } } else { @@ -6182,7 +6161,7 @@ fn read_sib< InnerDescription::RegisterNumber("iii", instr.regs[2].num, instr.regs[2]) .with_id(sib_start + 0) ); - OperandSpec::RegIndexBaseScale + OperandSpec::MemBaseIndexScale } } @@ -6216,7 +6195,7 @@ fn read_sib< InnerDescription::RegisterNumber("mod", 0b101, instr.regs[1]) .with_id(sib_start + 0) ); - OperandSpec::RegDisp + OperandSpec::Disp } } else { sink.record( @@ -6232,7 +6211,7 @@ fn read_sib< InnerDescription::Misc("mod bits select no base register, [index+disp] only") .with_id(sib_start + 0) ); - OperandSpec::RegScaleDisp + OperandSpec::MemIndexScaleDisp } else { sink.record( modrm_start + 6, @@ -6240,7 +6219,7 @@ fn read_sib< InnerDescription::RegisterNumber("mod", 0b101, instr.regs[1]) .with_id(sib_start + 0) ); - OperandSpec::RegIndexBaseScaleDisp + OperandSpec::MemBaseIndexScaleDisp } } } else { @@ -6257,7 +6236,7 @@ fn read_sib< InnerDescription::Misc("iii selects no index register") .with_id(sib_start + 0) ); - OperandSpec::RegDisp + OperandSpec::Disp } else { sink.record( sib_start + 3, @@ -6265,7 +6244,7 @@ fn read_sib< InnerDescription::RegisterNumber("iii", instr.regs[2].num, instr.regs[2]) .with_id(sib_start + 0) ); - OperandSpec::RegIndexBaseScaleDisp + OperandSpec::MemBaseIndexScaleDisp } } }; @@ -6375,7 +6354,7 @@ fn read_M_16bit< Ok(OperandSpec::Deref) } else { instr.scale = 1; - Ok(OperandSpec::RegIndexBaseScale) + Ok(OperandSpec::MemBaseIndexScale) } }, 0b01 => { @@ -6396,13 +6375,13 @@ fn read_M_16bit< ); if mmm > 3 { if instr.disp != 0 { - Ok(OperandSpec::RegDisp) + Ok(OperandSpec::Disp) } else { Ok(OperandSpec::Deref) } } else { instr.scale = 1; - Ok(OperandSpec::RegIndexBaseScaleDisp) + Ok(OperandSpec::MemBaseIndexScaleDisp) } }, 0b10 => { @@ -6423,13 +6402,13 @@ fn read_M_16bit< ); if mmm > 3 { if instr.disp != 0 { - Ok(OperandSpec::RegDisp) + Ok(OperandSpec::Disp) } else { Ok(OperandSpec::Deref) } } else { instr.scale = 1; - Ok(OperandSpec::RegIndexBaseScaleDisp) + Ok(OperandSpec::MemBaseIndexScaleDisp) } }, _ => { @@ -6525,7 +6504,7 @@ fn read_M< OperandSpec::Deref } else { instr.disp = disp as i32 as u32; - OperandSpec::RegDisp + OperandSpec::Disp } } }; diff --git a/test/long_mode/operand.rs b/test/long_mode/operand.rs index a47e6c8..0faa1c3 100644 --- a/test/long_mode/operand.rs +++ b/test/long_mode/operand.rs @@ -3,19 +3,19 @@ use yaxpeax_x86::MemoryAccessSize; #[test] fn register_widths() { - assert_eq!(Operand::Register(RegSpec::rsp()).width(), Some(8)); - assert_eq!(Operand::Register(RegSpec::esp()).width(), Some(4)); - assert_eq!(Operand::Register(RegSpec::sp()).width(), Some(2)); - assert_eq!(Operand::Register(RegSpec::cl()).width(), Some(1)); - assert_eq!(Operand::Register(RegSpec::ch()).width(), Some(1)); - assert_eq!(Operand::Register(RegSpec::gs()).width(), Some(2)); + assert_eq!(Operand::Register { reg: RegSpec::rsp() }.width(), Some(8)); + assert_eq!(Operand::Register { reg: RegSpec::esp() }.width(), Some(4)); + assert_eq!(Operand::Register { reg: RegSpec::sp() }.width(), Some(2)); + assert_eq!(Operand::Register { reg: RegSpec::cl() }.width(), Some(1)); + assert_eq!(Operand::Register { reg: RegSpec::ch() }.width(), Some(1)); + assert_eq!(Operand::Register { reg: RegSpec::gs() }.width(), Some(2)); } #[test] fn memory_widths() { // the register operand directly doesn't report a size - it comes from the `Instruction` for - // which this is an operand. - assert_eq!(Operand::RegDeref(RegSpec::rsp()).width(), None); + // which this is an operand . + assert_eq!(Operand::MemDeref { base: RegSpec::rsp() }.width(), None); fn mem_size_of(data: &[u8]) -> MemoryAccessSize { let decoder = InstDecoder::default(); diff --git a/test/protected_mode/operand.rs b/test/protected_mode/operand.rs index 6eb9ba5..78a34b4 100644 --- a/test/protected_mode/operand.rs +++ b/test/protected_mode/operand.rs @@ -3,18 +3,18 @@ use yaxpeax_x86::MemoryAccessSize; #[test] fn register_widths() { - assert_eq!(Operand::Register(RegSpec::esp()).width(), Some(4)); - assert_eq!(Operand::Register(RegSpec::sp()).width(), Some(2)); - assert_eq!(Operand::Register(RegSpec::cl()).width(), Some(1)); - assert_eq!(Operand::Register(RegSpec::ch()).width(), Some(1)); - assert_eq!(Operand::Register(RegSpec::gs()).width(), Some(2)); + assert_eq!(Operand::Register { reg: RegSpec::esp() }.width(), Some(4)); + assert_eq!(Operand::Register { reg: RegSpec::sp() }.width(), Some(2)); + assert_eq!(Operand::Register { reg: RegSpec::cl() }.width(), Some(1)); + assert_eq!(Operand::Register { reg: RegSpec::ch() }.width(), Some(1)); + assert_eq!(Operand::Register { reg: RegSpec::gs() }.width(), Some(2)); } #[test] fn memory_widths() { // the register operand directly doesn't report a size - it comes from the `Instruction` for // which this is an operand. - assert_eq!(Operand::RegDeref(RegSpec::esp()).width(), None); + assert_eq!(Operand::MemDeref { base: RegSpec::esp() }.width(), None); fn mem_size_of(data: &[u8]) -> MemoryAccessSize { let decoder = InstDecoder::default(); From f4ae2edd1a9733eee8b33a7937d07054256533a2 Mon Sep 17 00:00:00 2001 From: iximeow Date: Mon, 24 Jun 2024 12:53:31 -0700 Subject: [PATCH 89/95] one more stray docs error --- src/long_mode/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/long_mode/mod.rs b/src/long_mode/mod.rs index 6dabd6d..0ce6a58 100644 --- a/src/long_mode/mod.rs +++ b/src/long_mode/mod.rs @@ -931,7 +931,7 @@ const REGISTER_CLASS_NAMES: &[&'static str] = &[ /// /// if let Operand::Register { reg } = instruction.operand(1) { /// #[cfg(feature="fmt")] -/// println!("first operand is {}", reg); +/// println!("second operand is {}", reg); /// show_register_class_info(reg.class()); /// } /// ``` From ddde47c4c8c2058379b448894bebb3e099ea0585 Mon Sep 17 00:00:00 2001 From: iximeow Date: Mon, 24 Jun 2024 13:51:12 -0700 Subject: [PATCH 90/95] consistently enter register/number/opcode spans --- src/long_mode/display.rs | 139 +++++++++++++--------------------- src/protected_mode/display.rs | 123 +++++++++++------------------- src/real_mode/display.rs | 113 ++++++++++----------------- 3 files changed, 138 insertions(+), 237 deletions(-) diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index 469bb90..1193f35 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -65,6 +65,8 @@ trait DisplaySinkExt { fn write_opcode(&mut self, opcode: super::Opcode) -> Result<(), core::fmt::Error>; fn write_reg(&mut self, reg: RegSpec) -> Result<(), core::fmt::Error>; + fn write_displacement(&mut self, disp: i32) -> Result<(), core::fmt::Error>; + fn write_scale(&mut self, scale: u8) -> Result<(), core::fmt::Error>; fn write_mem_size_label(&mut self, mem_size: u8) -> Result<(), core::fmt::Error>; fn write_sae_mode(&mut self, sae: super::SaeMode) -> Result<(), core::fmt::Error>; } @@ -72,20 +74,49 @@ trait DisplaySinkExt { impl DisplaySinkExt for T { #[inline(always)] fn write_opcode(&mut self, opcode: super::Opcode) -> Result<(), core::fmt::Error> { + self.span_start_opcode(); let name = opcode.name(); let _ = Self::MNEMONIC_LT_32; // Safety: all opcode mnemonics are 31 bytes or fewer. - unsafe { self.write_lt_32(name) } + unsafe { self.write_lt_32(name)?; } + self.span_end_opcode(); + Ok(()) } #[inline(always)] fn write_reg(&mut self, reg: RegSpec) -> Result<(), core::fmt::Error> { + self.span_start_register(); let label = regspec_label(®); let _ = Self::REG_LABEL_LT_8; // Safety: all register labels are 7 bytes or fewer. - unsafe { self.write_lt_8(label) } + unsafe { self.write_lt_8(label)?; } + self.span_end_register(); + Ok(()) + } + + #[inline(always)] + fn write_displacement(&mut self, disp: i32) -> Result<(), core::fmt::Error> { + let mut v = disp as u32; + if disp < 0 { + self.write_fixed_size("- ")?; + v = disp.unsigned_abs(); + } else { + self.write_fixed_size("+ ")?; + } + self.span_start_number(); + self.write_prefixed_u32(v)?; + self.span_end_number(); + Ok(()) + } + + #[inline(always)] + fn write_scale(&mut self, scale: u8) -> Result<(), core::fmt::Error> { + self.span_start_number(); + self.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc + self.span_end_number(); + Ok(()) } #[inline(always)] @@ -367,20 +398,14 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> } #[cfg_attr(feature="profiling", inline(never))] fn visit_reg(&mut self, reg: RegSpec) -> Result { - self.f.span_start_register(); self.f.write_reg(reg)?; - self.f.span_end_register(); Ok(()) } fn visit_reg_mask_merge(&mut self, spec: RegSpec, mask: RegSpec, merge_mode: MergeMode) -> Result { - self.f.span_start_register(); self.f.write_reg(spec)?; - self.f.span_end_register(); if mask.num != 0 { self.f.write_fixed_size("{")?; - self.f.span_start_register(); self.f.write_reg(mask)?; - self.f.span_end_register(); self.f.write_fixed_size("}")?; } if let MergeMode::Zero = merge_mode { @@ -416,15 +441,17 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> } fn visit_abs_u32(&mut self, imm: u32) -> Result { self.f.write_fixed_size("[")?; - self.f.write_fixed_size("0x")?; - self.f.write_u32(imm)?; + self.f.span_start_address(); + self.f.write_prefixed_u32(imm)?; + self.f.span_end_address(); self.f.write_fixed_size("]")?; Ok(()) } fn visit_abs_u64(&mut self, imm: u64) -> Result { self.f.write_fixed_size("[")?; - self.f.write_fixed_size("0x")?; - self.f.write_u64(imm)?; + self.f.span_start_address(); + self.f.write_prefixed_u64(imm)?; + self.f.span_end_address(); self.f.write_fixed_size("]")?; Ok(()) } @@ -434,17 +461,7 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> self.f.write_char('[')?; self.f.write_reg(base)?; self.f.write_fixed_size(" ")?; - - { - let mut v = disp as u32; - if disp < 0 { - self.f.write_fixed_size("- 0x")?; - v = disp.unsigned_abs(); - } else { - self.f.write_fixed_size("+ 0x")?; - } - self.f.write_u32(v)?; - } + self.f.write_displacement(disp)?; self.f.write_fixed_size("]") } fn visit_deref(&mut self, base: RegSpec) -> Result { @@ -456,7 +473,7 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> self.f.write_fixed_size("[")?; self.f.write_reg(index)?; self.f.write_fixed_size(" * ")?; - self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc + self.f.write_scale(scale)?; self.f.write_fixed_size("]")?; Ok(()) @@ -465,19 +482,9 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> self.f.write_fixed_size("[")?; self.f.write_reg(index)?; self.f.write_fixed_size(" * ")?; - self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc + self.f.write_scale(scale)?; self.f.write_fixed_size(" ")?; - - { - let mut v = disp as u32; - if disp < 0 { - self.f.write_fixed_size("- 0x")?; - v = disp.unsigned_abs(); - } else { - self.f.write_fixed_size("+ 0x")?; - } - self.f.write_u32(v)?; - } + self.f.write_displacement(disp)?; self.f.write_char(']') } fn visit_base_index_scale(&mut self, base: RegSpec, index: RegSpec, scale: u8) -> Result { @@ -486,7 +493,7 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> self.f.write_fixed_size(" + ")?; self.f.write_reg(index)?; self.f.write_fixed_size(" * ")?; - self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc + self.f.write_scale(scale)?; self.f.write_fixed_size("]") } fn visit_base_index_scale_disp(&mut self, base: RegSpec, index: RegSpec, scale: u8, disp: i32) -> Result { @@ -495,33 +502,16 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> self.f.write_fixed_size(" + ")?; self.f.write_reg(index)?; self.f.write_fixed_size(" * ")?; - self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc + self.f.write_scale(scale)?; self.f.write_fixed_size(" ")?; - - { - let mut v = disp as u32; - if disp < 0 { - self.f.write_fixed_size("- 0x")?; - v = disp.unsigned_abs(); - } else { - self.f.write_fixed_size("+ 0x")?; - } - self.f.write_u32(v)?; - } + self.f.write_displacement(disp)?; self.f.write_fixed_size("]") } fn visit_disp_masked(&mut self, base: RegSpec, disp: i32, mask_reg: RegSpec) -> Result { self.f.write_char('[')?; self.f.write_reg(base)?; self.f.write_char(' ')?; - let mut v = disp as u32; - if disp < 0 { - self.f.write_fixed_size("- 0x")?; - v = disp.unsigned_abs(); - } else { - self.f.write_fixed_size("+ 0x")?; - } - self.f.write_u32(v)?; + self.f.write_displacement(disp)?; self.f.write_char(']')?; self.f.write_char('{')?; self.f.write_reg(mask_reg)?; @@ -541,7 +531,7 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> self.f.write_fixed_size("[")?; self.f.write_reg(index)?; self.f.write_fixed_size(" * ")?; - self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc + self.f.write_scale(scale)?; self.f.write_fixed_size("]")?; self.f.write_char('{')?; self.f.write_reg(mask_reg)?; @@ -552,16 +542,9 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> self.f.write_fixed_size("[")?; self.f.write_reg(index)?; self.f.write_fixed_size(" * ")?; - self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc + self.f.write_scale(scale)?; self.f.write_fixed_size(" ")?; - let mut v = disp as u32; - if disp < 0 { - self.f.write_fixed_size("- 0x")?; - v = disp.unsigned_abs(); - } else { - self.f.write_fixed_size("+ 0x")?; - } - self.f.write_u32(v)?; + self.f.write_displacement(disp)?; self.f.write_char(']')?; self.f.write_char('{')?; self.f.write_reg(mask_reg)?; @@ -585,14 +568,7 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> self.f.write_fixed_size(" + ")?; self.f.write_reg(index)?; self.f.write_fixed_size(" ")?; - let mut v = disp as u32; - if disp < 0 { - self.f.write_fixed_size("- 0x")?; - v = disp.unsigned_abs(); - } else { - self.f.write_fixed_size("+ 0x")?; - } - self.f.write_u32(v)?; + self.f.write_displacement(disp)?; self.f.write_char(']')?; self.f.write_char('{')?; self.f.write_reg(mask_reg)?; @@ -605,7 +581,7 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> self.f.write_fixed_size(" + ")?; self.f.write_reg(index)?; self.f.write_fixed_size(" * ")?; - self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc + self.f.write_scale(scale)?; self.f.write_fixed_size("]")?; self.f.write_char('{')?; self.f.write_reg(mask_reg)?; @@ -618,16 +594,9 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> self.f.write_fixed_size(" + ")?; self.f.write_reg(index)?; self.f.write_fixed_size(" * ")?; - self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc + self.f.write_scale(scale)?; self.f.write_char(' ')?; - let mut v = disp as u32; - if disp < 0 { - self.f.write_fixed_size("- 0x")?; - v = disp.unsigned_abs(); - } else { - self.f.write_fixed_size("+ 0x")?; - } - self.f.write_u32(v)?; + self.f.write_displacement(disp)?; self.f.write_char(']')?; self.f.write_char('{')?; self.f.write_reg(mask_reg)?; diff --git a/src/protected_mode/display.rs b/src/protected_mode/display.rs index 1629b4e..db12878 100644 --- a/src/protected_mode/display.rs +++ b/src/protected_mode/display.rs @@ -65,6 +65,8 @@ trait DisplaySinkExt { fn write_opcode(&mut self, opcode: super::Opcode) -> Result<(), core::fmt::Error>; fn write_reg(&mut self, reg: RegSpec) -> Result<(), core::fmt::Error>; + fn write_displacement(&mut self, disp: i32) -> Result<(), core::fmt::Error>; + fn write_scale(&mut self, scale: u8) -> Result<(), core::fmt::Error>; fn write_mem_size_label(&mut self, mem_size: u8) -> Result<(), core::fmt::Error>; fn write_sae_mode(&mut self, sae: super::SaeMode) -> Result<(), core::fmt::Error>; } @@ -88,6 +90,29 @@ impl DisplaySinkExt for T { unsafe { self.write_lt_8(label) } } + #[inline(always)] + fn write_displacement(&mut self, disp: i32) -> Result<(), core::fmt::Error> { + let mut v = disp as u32; + if disp < 0 { + self.write_fixed_size("- ")?; + v = disp.unsigned_abs(); + } else { + self.write_fixed_size("+ ")?; + } + self.span_start_number(); + self.write_prefixed_u32(v)?; + self.span_end_number(); + Ok(()) + } + + #[inline(always)] + fn write_scale(&mut self, scale: u8) -> Result<(), core::fmt::Error> { + self.span_start_number(); + self.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc + self.span_end_number(); + Ok(()) + } + #[inline(always)] fn write_mem_size_label(&mut self, mem_size: u8) -> Result<(), core::fmt::Error> { let label = mem_size_label(mem_size); @@ -343,20 +368,14 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> } #[cfg_attr(feature="profiling", inline(never))] fn visit_reg(&mut self, reg: RegSpec) -> Result { - self.f.span_start_register(); self.f.write_reg(reg)?; - self.f.span_end_register(); Ok(()) } fn visit_reg_mask_merge(&mut self, spec: RegSpec, mask: RegSpec, merge_mode: MergeMode) -> Result { - self.f.span_start_register(); self.f.write_reg(spec)?; - self.f.span_end_register(); if mask.num != 0 { self.f.write_fixed_size("{")?; - self.f.span_start_register(); self.f.write_reg(mask)?; - self.f.span_end_register(); self.f.write_fixed_size("}")?; } if let MergeMode::Zero = merge_mode { @@ -392,15 +411,19 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> } fn visit_abs_u16(&mut self, imm: u16) -> Result { self.f.write_fixed_size("[")?; + self.f.span_start_address(); self.f.write_fixed_size("0x")?; self.f.write_u16(imm)?; + self.f.span_end_address(); self.f.write_fixed_size("]")?; Ok(()) } fn visit_abs_u32(&mut self, imm: u32) -> Result { self.f.write_fixed_size("[")?; + self.f.span_start_address(); self.f.write_fixed_size("0x")?; self.f.write_u32(imm)?; + self.f.span_end_address(); self.f.write_fixed_size("]")?; Ok(()) } @@ -410,17 +433,7 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> self.f.write_char('[')?; self.f.write_reg(base)?; self.f.write_fixed_size(" ")?; - - { - let mut v = disp as u32; - if disp < 0 { - self.f.write_fixed_size("- 0x")?; - v = disp.unsigned_abs(); - } else { - self.f.write_fixed_size("+ 0x")?; - } - self.f.write_u32(v)?; - } + self.f.write_displacement(disp)?; self.f.write_fixed_size("]") } fn visit_deref(&mut self, base: RegSpec) -> Result { @@ -432,7 +445,7 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> self.f.write_fixed_size("[")?; self.f.write_reg(index)?; self.f.write_fixed_size(" * ")?; - self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc + self.f.write_scale(scale)?; self.f.write_fixed_size("]")?; Ok(()) @@ -441,19 +454,9 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> self.f.write_fixed_size("[")?; self.f.write_reg(index)?; self.f.write_fixed_size(" * ")?; - self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc + self.f.write_scale(scale)?; self.f.write_fixed_size(" ")?; - - { - let mut v = disp as u32; - if disp < 0 { - self.f.write_fixed_size("- 0x")?; - v = disp.unsigned_abs(); - } else { - self.f.write_fixed_size("+ 0x")?; - } - self.f.write_u32(v)?; - } + self.f.write_displacement(disp)?; self.f.write_char(']') } fn visit_base_index_scale(&mut self, base: RegSpec, index: RegSpec, scale: u8) -> Result { @@ -462,7 +465,7 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> self.f.write_fixed_size(" + ")?; self.f.write_reg(index)?; self.f.write_fixed_size(" * ")?; - self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc + self.f.write_scale(scale)?; self.f.write_fixed_size("]") } fn visit_base_index_scale_disp(&mut self, base: RegSpec, index: RegSpec, scale: u8, disp: i32) -> Result { @@ -471,33 +474,16 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> self.f.write_fixed_size(" + ")?; self.f.write_reg(index)?; self.f.write_fixed_size(" * ")?; - self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc + self.f.write_scale(scale)?; self.f.write_fixed_size(" ")?; - - { - let mut v = disp as u32; - if disp < 0 { - self.f.write_fixed_size("- 0x")?; - v = disp.unsigned_abs(); - } else { - self.f.write_fixed_size("+ 0x")?; - } - self.f.write_u32(v)?; - } + self.f.write_displacement(disp)?; self.f.write_fixed_size("]") } fn visit_disp_masked(&mut self, base: RegSpec, disp: i32, mask_reg: RegSpec) -> Result { self.f.write_char('[')?; self.f.write_reg(base)?; self.f.write_char(' ')?; - let mut v = disp as u32; - if disp < 0 { - self.f.write_fixed_size("- 0x")?; - v = disp.unsigned_abs(); - } else { - self.f.write_fixed_size("+ 0x")?; - } - self.f.write_u32(v)?; + self.f.write_displacement(disp)?; self.f.write_char(']')?; self.f.write_char('{')?; self.f.write_reg(mask_reg)?; @@ -517,7 +503,7 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> self.f.write_fixed_size("[")?; self.f.write_reg(index)?; self.f.write_fixed_size(" * ")?; - self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc + self.f.write_scale(scale)?; self.f.write_fixed_size("]")?; self.f.write_char('{')?; self.f.write_reg(mask_reg)?; @@ -528,16 +514,9 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> self.f.write_fixed_size("[")?; self.f.write_reg(index)?; self.f.write_fixed_size(" * ")?; - self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc + self.f.write_scale(scale)?; self.f.write_fixed_size(" ")?; - let mut v = disp as u32; - if disp < 0 { - self.f.write_fixed_size("- 0x")?; - v = disp.unsigned_abs(); - } else { - self.f.write_fixed_size("+ 0x")?; - } - self.f.write_u32(v)?; + self.f.write_displacement(disp)?; self.f.write_char(']')?; self.f.write_char('{')?; self.f.write_reg(mask_reg)?; @@ -561,14 +540,7 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> self.f.write_fixed_size(" + ")?; self.f.write_reg(index)?; self.f.write_fixed_size(" ")?; - let mut v = disp as u32; - if disp < 0 { - self.f.write_fixed_size("- 0x")?; - v = disp.unsigned_abs(); - } else { - self.f.write_fixed_size("+ 0x")?; - } - self.f.write_u32(v)?; + self.f.write_displacement(disp)?; self.f.write_char(']')?; self.f.write_char('{')?; self.f.write_reg(mask_reg)?; @@ -581,7 +553,7 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> self.f.write_fixed_size(" + ")?; self.f.write_reg(index)?; self.f.write_fixed_size(" * ")?; - self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc + self.f.write_scale(scale)?; self.f.write_fixed_size("]")?; self.f.write_char('{')?; self.f.write_reg(mask_reg)?; @@ -594,16 +566,9 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> self.f.write_fixed_size(" + ")?; self.f.write_reg(index)?; self.f.write_fixed_size(" * ")?; - self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc + self.f.write_scale(scale)?; self.f.write_char(' ')?; - let mut v = disp as u32; - if disp < 0 { - self.f.write_fixed_size("- 0x")?; - v = disp.unsigned_abs(); - } else { - self.f.write_fixed_size("+ 0x")?; - } - self.f.write_u32(v)?; + self.f.write_displacement(disp)?; self.f.write_char(']')?; self.f.write_char('{')?; self.f.write_reg(mask_reg)?; diff --git a/src/real_mode/display.rs b/src/real_mode/display.rs index bea11fc..e8bd191 100644 --- a/src/real_mode/display.rs +++ b/src/real_mode/display.rs @@ -65,6 +65,8 @@ trait DisplaySinkExt { fn write_opcode(&mut self, opcode: super::Opcode) -> Result<(), core::fmt::Error>; fn write_reg(&mut self, reg: RegSpec) -> Result<(), core::fmt::Error>; + fn write_displacement(&mut self, disp: i32) -> Result<(), core::fmt::Error>; + fn write_scale(&mut self, scale: u8) -> Result<(), core::fmt::Error>; fn write_mem_size_label(&mut self, mem_size: u8) -> Result<(), core::fmt::Error>; fn write_sae_mode(&mut self, sae: super::SaeMode) -> Result<(), core::fmt::Error>; } @@ -88,6 +90,29 @@ impl DisplaySinkExt for T { unsafe { self.write_lt_8(label) } } + #[inline(always)] + fn write_displacement(&mut self, disp: i32) -> Result<(), core::fmt::Error> { + let mut v = disp as u32; + if disp < 0 { + self.write_fixed_size("- ")?; + v = disp.unsigned_abs(); + } else { + self.write_fixed_size("+ ")?; + } + self.span_start_number(); + self.write_prefixed_u32(v)?; + self.span_end_number(); + Ok(()) + } + + #[inline(always)] + fn write_scale(&mut self, scale: u8) -> Result<(), core::fmt::Error> { + self.span_start_number(); + self.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc + self.span_end_number(); + Ok(()) + } + #[inline(always)] fn write_mem_size_label(&mut self, mem_size: u8) -> Result<(), core::fmt::Error> { let label = mem_size_label(mem_size); @@ -410,17 +435,7 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> self.f.write_char('[')?; self.f.write_reg(base)?; self.f.write_fixed_size(" ")?; - - { - let mut v = disp as u32; - if disp < 0 { - self.f.write_fixed_size("- 0x")?; - v = disp.unsigned_abs(); - } else { - self.f.write_fixed_size("+ 0x")?; - } - self.f.write_u32(v)?; - } + self.f.write_displacement(disp)?; self.f.write_fixed_size("]") } fn visit_deref(&mut self, base: RegSpec) -> Result { @@ -432,7 +447,7 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> self.f.write_fixed_size("[")?; self.f.write_reg(index)?; self.f.write_fixed_size(" * ")?; - self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc + self.f.write_scale(scale)?; self.f.write_fixed_size("]")?; Ok(()) @@ -441,19 +456,9 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> self.f.write_fixed_size("[")?; self.f.write_reg(index)?; self.f.write_fixed_size(" * ")?; - self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc + self.f.write_scale(scale)?; self.f.write_fixed_size(" ")?; - - { - let mut v = disp as u32; - if disp < 0 { - self.f.write_fixed_size("- 0x")?; - v = disp.unsigned_abs(); - } else { - self.f.write_fixed_size("+ 0x")?; - } - self.f.write_u32(v)?; - } + self.f.write_displacement(disp)?; self.f.write_char(']') } fn visit_base_index_scale(&mut self, base: RegSpec, index: RegSpec, scale: u8) -> Result { @@ -462,7 +467,7 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> self.f.write_fixed_size(" + ")?; self.f.write_reg(index)?; self.f.write_fixed_size(" * ")?; - self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc + self.f.write_scale(scale)?; self.f.write_fixed_size("]") } fn visit_base_index_scale_disp(&mut self, base: RegSpec, index: RegSpec, scale: u8, disp: i32) -> Result { @@ -471,33 +476,16 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> self.f.write_fixed_size(" + ")?; self.f.write_reg(index)?; self.f.write_fixed_size(" * ")?; - self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc + self.f.write_scale(scale)?; self.f.write_fixed_size(" ")?; - - { - let mut v = disp as u32; - if disp < 0 { - self.f.write_fixed_size("- 0x")?; - v = disp.unsigned_abs(); - } else { - self.f.write_fixed_size("+ 0x")?; - } - self.f.write_u32(v)?; - } + self.f.write_displacement(disp)?; self.f.write_fixed_size("]") } fn visit_disp_masked(&mut self, base: RegSpec, disp: i32, mask_reg: RegSpec) -> Result { self.f.write_char('[')?; self.f.write_reg(base)?; self.f.write_char(' ')?; - let mut v = disp as u32; - if disp < 0 { - self.f.write_fixed_size("- 0x")?; - v = disp.unsigned_abs(); - } else { - self.f.write_fixed_size("+ 0x")?; - } - self.f.write_u32(v)?; + self.f.write_displacement(disp)?; self.f.write_char(']')?; self.f.write_char('{')?; self.f.write_reg(mask_reg)?; @@ -517,7 +505,7 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> self.f.write_fixed_size("[")?; self.f.write_reg(index)?; self.f.write_fixed_size(" * ")?; - self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc + self.f.write_scale(scale)?; self.f.write_fixed_size("]")?; self.f.write_char('{')?; self.f.write_reg(mask_reg)?; @@ -528,16 +516,9 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> self.f.write_fixed_size("[")?; self.f.write_reg(index)?; self.f.write_fixed_size(" * ")?; - self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc + self.f.write_scale(scale)?; self.f.write_fixed_size(" ")?; - let mut v = disp as u32; - if disp < 0 { - self.f.write_fixed_size("- 0x")?; - v = disp.unsigned_abs(); - } else { - self.f.write_fixed_size("+ 0x")?; - } - self.f.write_u32(v)?; + self.f.write_displacement(disp)?; self.f.write_char(']')?; self.f.write_char('{')?; self.f.write_reg(mask_reg)?; @@ -561,14 +542,7 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> self.f.write_fixed_size(" + ")?; self.f.write_reg(index)?; self.f.write_fixed_size(" ")?; - let mut v = disp as u32; - if disp < 0 { - self.f.write_fixed_size("- 0x")?; - v = disp.unsigned_abs(); - } else { - self.f.write_fixed_size("+ 0x")?; - } - self.f.write_u32(v)?; + self.f.write_displacement(disp)?; self.f.write_char(']')?; self.f.write_char('{')?; self.f.write_reg(mask_reg)?; @@ -581,7 +555,7 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> self.f.write_fixed_size(" + ")?; self.f.write_reg(index)?; self.f.write_fixed_size(" * ")?; - self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc + self.f.write_scale(scale)?; self.f.write_fixed_size("]")?; self.f.write_char('{')?; self.f.write_reg(mask_reg)?; @@ -594,16 +568,9 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> self.f.write_fixed_size(" + ")?; self.f.write_reg(index)?; self.f.write_fixed_size(" * ")?; - self.f.write_char((0x30 + scale) as char)?; // translate scale=1 to '1', scale=2 to '2', etc + self.f.write_scale(scale)?; self.f.write_char(' ')?; - let mut v = disp as u32; - if disp < 0 { - self.f.write_fixed_size("- 0x")?; - v = disp.unsigned_abs(); - } else { - self.f.write_fixed_size("+ 0x")?; - } - self.f.write_u32(v)?; + self.f.write_displacement(disp)?; self.f.write_char(']')?; self.f.write_char('{')?; self.f.write_reg(mask_reg)?; From dd8bd5ce0772b08c271205508e48e98ef1c58ea8 Mon Sep 17 00:00:00 2001 From: iximeow Date: Mon, 24 Jun 2024 14:06:22 -0700 Subject: [PATCH 91/95] justify the current max instruction length this is also checked by a new fuzz target --- fuzz/Cargo.toml | 6 +++ .../instruction_text_buffer_size_ok.rs | 51 +++++++++++++++++++ src/lib.rs | 36 +++++++++++++ src/long_mode/display.rs | 4 +- src/protected_mode/display.rs | 4 +- src/real_mode/display.rs | 4 +- 6 files changed, 96 insertions(+), 9 deletions(-) create mode 100644 fuzz/fuzz_targets/instruction_text_buffer_size_ok.rs diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 2203dc3..a1f871e 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -35,6 +35,12 @@ path = "fuzz_targets/displaysink_used_correctly.rs" test = false doc = false +[[bin]] +name = "instruction_text_buffer_size_ok" +path = "fuzz_targets/instruction_text_buffer_size_ok.rs" +test = false +doc = false + [[bin]] name = "display_c_does_not_panic" path = "fuzz_targets/display_c_does_not_panic.rs" diff --git a/fuzz/fuzz_targets/instruction_text_buffer_size_ok.rs b/fuzz/fuzz_targets/instruction_text_buffer_size_ok.rs new file mode 100644 index 0000000..2c88424 --- /dev/null +++ b/fuzz/fuzz_targets/instruction_text_buffer_size_ok.rs @@ -0,0 +1,51 @@ +#![no_main] +#[macro_use] extern crate libfuzzer_sys; +extern crate yaxpeax_x86; +extern crate yaxpeax_arch; + +use std::fmt::Write; + +fuzz_target!(|data: &[u8]| { + let x86_64_decoder = yaxpeax_x86::long_mode::InstDecoder::default(); + let x86_32_decoder = yaxpeax_x86::protected_mode::InstDecoder::default(); + let x86_16_decoder = yaxpeax_x86::real_mode::InstDecoder::default(); + + if let Ok(inst) = x86_64_decoder.decode_slice(data) { + use yaxpeax_x86::long_mode::DisplayStyle; + + let mut s = String::new(); + write!(s, "{}", inst.display_with(DisplayStyle::Intel)).expect("can write"); + // MAX_INSTRUCTION_LEN is not a public crate item yet... + assert!(s.len() < 512); + s.clear(); + write!(s, "{}", inst.display_with(DisplayStyle::C)).expect("can write"); + // MAX_INSTRUCTION_LEN is not a public crate item yet... + assert!(s.len() < 512); + }; + + if let Ok(inst) = x86_32_decoder.decode_slice(data) { + use yaxpeax_x86::protected_mode::DisplayStyle; + + let mut s = String::new(); + write!(s, "{}", inst.display_with(DisplayStyle::Intel)).expect("can write"); + // MAX_INSTRUCTION_LEN is not a public crate item yet... + assert!(s.len() < 512); + s.clear(); + write!(s, "{}", inst.display_with(DisplayStyle::C)).expect("can write"); + // MAX_INSTRUCTION_LEN is not a public crate item yet... + assert!(s.len() < 512); + }; + + if let Ok(inst) = x86_16_decoder.decode_slice(data) { + use yaxpeax_x86::real_mode::DisplayStyle; + + let mut s = String::new(); + write!(s, "{}", inst.display_with(DisplayStyle::Intel)).expect("can write"); + // MAX_INSTRUCTION_LEN is not a public crate item yet... + assert!(s.len() < 512); + s.clear(); + write!(s, "{}", inst.display_with(DisplayStyle::C)).expect("can write"); + // MAX_INSTRUCTION_LEN is not a public crate item yet... + assert!(s.len() < 512); + }; +}); diff --git a/src/lib.rs b/src/lib.rs index 7ab6cb8..93274f9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -138,6 +138,42 @@ pub use protected_mode::Arch as x86_32; pub mod real_mode; pub use real_mode::Arch as x86_16; +// this exists to size `InstructionTextBuffer`'s buffer. it ideally would come from an `Arch` +// impl, or something related to `Arch`, but i'm not yet sure how to wire that up into +// yaxpeax-arch. so instead calculate an appropriate max size for all of 16-bit/32-bit/64-bit +// instruction printing that `InstructionTextBuffer` can be used for. +// +// `InstructionTextBuffer` prints an `InstructionDisplayer`, which means either intel syntax or +// pseudo-C. in the future, at&t probably, as well. +// +// the pseudo-C syntax's max length would be something like: +// ``` +// xacquire xrelease lock { repnz qword if /* signed */ greater_or_equal(rflags) then jmp gs:[xmm31 + +// xmm31 * 8 + 0x12345678]{k7}{z}{rne-sae} } +// ``` +// (which is nonsensical) or for an unknown opcode, +// ``` +// xacquire xrelease lock { op0 = op(op0, op1, op2, op3) } +// ``` +// where `opN` is an operand. the longest operand, same as above, would be something like +// ``` +// gs:[xmm31 + xmm31 * 8 + 0x12345678]{k7}{z}{rne-sae} +// ``` +// for a length like 262 bytes of operand, 55 bytes of prefixes and syntax, and another up-to-20 +// bytes of opcode. +// +// the longest contextualize_c might write is around 337 bytes. round up to 512 because it's.. not +// much extra. +// +// the same reasoning for intel syntax yields a smaller instruction: +// ``` +// xacquire xrelease lock op op1, op2, op3, op4 +// ``` +// where the longest operands are the same as above. this comes out to closer to 307 bytes. 512 +// bytes is still the longest of the two options. +#[allow(dead_code)] // can be an unused constant in some library configurations +const MAX_INSTRUCTION_LEN: usize = 512; + const MEM_SIZE_STRINGS: [&'static str; 65] = [ "BUG", "byte", "word", "BUG", "dword", "ptr", "far", "BUG", "qword", diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index 1193f35..89d952b 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -4410,9 +4410,7 @@ mod buffer_sink { /// settings format instructions identically to their corresponding `fmt::Display`. pub fn new() -> Self { let mut buf = alloc::string::String::new(); - // TODO: move 512 out to a MAX_INSTRUCTION_LEN const and appropriate justification (and - // fuzzing and ..) - buf.reserve(512); + buf.reserve(crate::MAX_INSTRUCTION_LEN); Self { content: buf, } diff --git a/src/protected_mode/display.rs b/src/protected_mode/display.rs index db12878..321b5b5 100644 --- a/src/protected_mode/display.rs +++ b/src/protected_mode/display.rs @@ -2955,9 +2955,7 @@ mod buffer_sink { /// settings format instructions identically to their corresponding `fmt::Display`. pub fn new() -> Self { let mut buf = alloc::string::String::new(); - // TODO: move 512 out to a MAX_INSTRUCTION_LEN const and appropriate justification (and - // fuzzing and ..) - buf.reserve(512); + buf.reserve(crate::MAX_INSTRUCTION_LEN); Self { content: buf, } diff --git a/src/real_mode/display.rs b/src/real_mode/display.rs index e8bd191..669b8d7 100644 --- a/src/real_mode/display.rs +++ b/src/real_mode/display.rs @@ -2957,9 +2957,7 @@ mod buffer_sink { /// settings format instructions identically to their corresponding `fmt::Display`. pub fn new() -> Self { let mut buf = alloc::string::String::new(); - // TODO: move 512 out to a MAX_INSTRUCTION_LEN const and appropriate justification (and - // fuzzing and ..) - buf.reserve(512); + buf.reserve(crate::MAX_INSTRUCTION_LEN); Self { content: buf, } From 42f29e3f6194166ae954acee4591ee23d5d6494c Mon Sep 17 00:00:00 2001 From: iximeow Date: Mon, 24 Jun 2024 14:29:25 -0700 Subject: [PATCH 92/95] bump cargo version to 2.0.0, not quite releasing yet --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 907a70b..7b52dd9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "yaxpeax-x86" -version = "1.2.2" +version = "2.0.0" authors = [ "iximeow " ] license = "0BSD" repository = "http://git.iximeow.net/yaxpeax-x86/" From 016583f3f237938816a819a882510e9a57a141ad Mon Sep 17 00:00:00 2001 From: iximeow Date: Mon, 24 Jun 2024 14:32:47 -0700 Subject: [PATCH 93/95] bench: fetch from fork updated for yaxpeax-x86 2.0.0 --- goodfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/goodfile b/goodfile index 37db819..cf93b86 100644 --- a/goodfile +++ b/goodfile @@ -28,7 +28,7 @@ Build.artifact(sopath) -- now run some perf numbers... Step.start("perf") -Build.run({"git", "clone", "https://github.com/athre0z/disas-bench.git", "disas-bench"}) +Build.run({"git", "clone", "https://github.com/iximeow/disas-bench.git", "disas-bench"}) Build.run({"git", "submodule", "update", "--recursive", "--init"}, {cwd="disas-bench"}) Build.run({"git", "remote", "add", "dev", "../../.."}, {cwd="disas-bench/libs/yaxpeax"}) Build.run({"git", "fetch", "-a", "dev"}, {cwd="disas-bench/libs/yaxpeax"}) From 6a5ea107475284756070614a566970fbb383c4e6 Mon Sep 17 00:00:00 2001 From: iximeow Date: Mon, 24 Jun 2024 15:14:16 -0700 Subject: [PATCH 94/95] add missing feature flag to real-mode ffi library ffi/ still needs... much more work --- ffi/real_mode/Cargo.toml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/ffi/real_mode/Cargo.toml b/ffi/real_mode/Cargo.toml index c079e0e..03b5573 100644 --- a/ffi/real_mode/Cargo.toml +++ b/ffi/real_mode/Cargo.toml @@ -11,4 +11,9 @@ yaxpeax-arch = { version = "0.3.1", default-features = false } [lib] name = "yaxpeax_x86_ffi_real_mode" path = "src/lib.rs" -crate-type = ["staticlib"] +crate-type = ["staticlib", "cdylib"] + +[features] +default = ["fmt"] + +fmt = ["yaxpeax-x86/fmt"] From 24b33d5fdc9513c1b46e99b526d21e0a7b5eea38 Mon Sep 17 00:00:00 2001 From: iximeow Date: Mon, 24 Jun 2024 15:21:56 -0700 Subject: [PATCH 95/95] document one more stray unsafe --- src/long_mode/display.rs | 10 ++++++---- src/protected_mode/display.rs | 10 ++++++---- src/real_mode/display.rs | 10 ++++++---- 3 files changed, 18 insertions(+), 12 deletions(-) diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index 89d952b..b9023ed 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -9,6 +9,7 @@ use crate::long_mode::{RegSpec, Opcode, Operand, MergeMode, InstDecoder, Instruc use yaxpeax_arch::display::DisplaySink; use yaxpeax_arch::safer_unchecked::GetSaferUnchecked as _; +use yaxpeax_arch::safer_unchecked::unreachable_kinda_unchecked as unreachable_unchecked; trait DisplaySinkExt { // `write_opcode` depends on all mnemonics being less than 32 bytes long. check that here, at @@ -3693,15 +3694,16 @@ pub(crate) fn contextualize_intel(instr: &Instruction, out: &mut // don't worry about checking for `instr.operands[i] != Nothing`, it would be a bug to // reach that while iterating only to `operand_count`.. out.write_fixed_size(", ")?; + // hint that accessing `inster.operands[i]` can't panic: this is useful for + // `instr.operands` and the segment selector check after. if i >= 4 { - unsafe { core::hint::unreachable_unchecked(); } + // Safety: Instruction::operands is a four-element array; operand_count is always + // low enough that 0..operand_count is a valid index. + unsafe { unreachable_unchecked(); } } if instr.operands[i as usize].is_memory() { out.write_mem_size_label(instr.mem_size)?; - if i >= 4 { - unsafe { core::hint::unreachable_unchecked(); } - } if let Some(prefix) = instr.segment_override_for_op(i) { let name = prefix.name(); out.write_char(' ')?; diff --git a/src/protected_mode/display.rs b/src/protected_mode/display.rs index 321b5b5..8124337 100644 --- a/src/protected_mode/display.rs +++ b/src/protected_mode/display.rs @@ -9,6 +9,7 @@ use crate::protected_mode::{RegSpec, Opcode, Operand, MergeMode, InstDecoder, In use yaxpeax_arch::display::DisplaySink; use yaxpeax_arch::safer_unchecked::GetSaferUnchecked as _; +use yaxpeax_arch::safer_unchecked::unreachable_kinda_unchecked as unreachable_unchecked; trait DisplaySinkExt { // `write_opcode` depends on all mnemonics being less than 32 bytes long. check that here, at @@ -2249,15 +2250,16 @@ pub(crate) fn contextualize_intel(instr: &Instruction, out: &mut // don't worry about checking for `instr.operands[i] != Nothing`, it would be a bug to // reach that while iterating only to `operand_count`.. out.write_fixed_size(", ")?; + // hint that accessing `inster.operands[i]` can't panic: this is useful for + // `instr.operands` and the segment selector check after. if i >= 4 { - unsafe { core::hint::unreachable_unchecked(); } + // Safety: Instruction::operands is a four-element array; operand_count is always + // low enough that 0..operand_count is a valid index. + unsafe { unreachable_unchecked(); } } if instr.operands[i as usize].is_memory() { out.write_mem_size_label(instr.mem_size)?; - if i >= 4 { - unsafe { core::hint::unreachable_unchecked(); } - } if let Some(prefix) = instr.segment_override_for_op(i) { let name = prefix.name(); out.write_char(' ')?; diff --git a/src/real_mode/display.rs b/src/real_mode/display.rs index 669b8d7..9607e9d 100644 --- a/src/real_mode/display.rs +++ b/src/real_mode/display.rs @@ -9,6 +9,7 @@ use crate::real_mode::{RegSpec, Opcode, Operand, MergeMode, InstDecoder, Instruc use yaxpeax_arch::display::DisplaySink; use yaxpeax_arch::safer_unchecked::GetSaferUnchecked as _; +use yaxpeax_arch::safer_unchecked::unreachable_kinda_unchecked as unreachable_unchecked; trait DisplaySinkExt { // `write_opcode` depends on all mnemonics being less than 32 bytes long. check that here, at @@ -2251,15 +2252,16 @@ pub(crate) fn contextualize_intel(instr: &Instruction, out: &mut // don't worry about checking for `instr.operands[i] != Nothing`, it would be a bug to // reach that while iterating only to `operand_count`.. out.write_fixed_size(", ")?; + // hint that accessing `inster.operands[i]` can't panic: this is useful for + // `instr.operands` and the segment selector check after. if i >= 4 { - unsafe { core::hint::unreachable_unchecked(); } + // Safety: Instruction::operands is a four-element array; operand_count is always + // low enough that 0..operand_count is a valid index. + unsafe { unreachable_unchecked(); } } if instr.operands[i as usize].is_memory() { out.write_mem_size_label(instr.mem_size)?; - if i >= 4 { - unsafe { core::hint::unreachable_unchecked(); } - } if let Some(prefix) = instr.segment_override_for_op(i) { let name = prefix.name(); out.write_char(' ')?;