// Copyright 2014 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // Package x86asm implements decoding of x86 machine code. package x86asm import ( "bytes" "fmt" ) // An Inst is a single instruction. type Inst struct { Prefix Prefixes // Prefixes applied to the instruction. Op Op // Opcode mnemonic Opcode uint32 // Encoded opcode bits, left aligned (first byte is Opcode>>24, etc) Args Args // Instruction arguments, in Intel order Mode int // processor mode in bits: 16, 32, or 64 AddrSize int // address size in bits: 16, 32, or 64 DataSize int // operand size in bits: 16, 32, or 64 MemBytes int // size of memory argument in bytes: 1, 2, 4, 8, 16, and so on. Len int // length of encoded instruction in bytes PCRel int // length of PC-relative address in instruction encoding PCRelOff int // index of start of PC-relative address in instruction encoding } // Prefixes is an array of prefixes associated with a single instruction. // The prefixes are listed in the same order as found in the instruction: // each prefix byte corresponds to one slot in the array. The first zero // in the array marks the end of the prefixes. type Prefixes [14]Prefix // A Prefix represents an Intel instruction prefix. // The low 8 bits are the actual prefix byte encoding, // and the top 8 bits contain distinguishing bits and metadata. type Prefix uint16 const ( // Metadata about the role of a prefix in an instruction. PrefixImplicit Prefix = 0x8000 // prefix is implied by instruction text PrefixIgnored Prefix = 0x4000 // prefix is ignored: either irrelevant or overridden by a later prefix PrefixInvalid Prefix = 0x2000 // prefix makes entire instruction invalid (bad LOCK) // Memory segment overrides. PrefixES Prefix = 0x26 // ES segment override PrefixCS Prefix = 0x2E // CS segment override PrefixSS Prefix = 0x36 // SS segment override PrefixDS Prefix = 0x3E // DS segment override PrefixFS Prefix = 0x64 // FS segment override PrefixGS Prefix = 0x65 // GS segment override // Branch prediction. PrefixPN Prefix = 0x12E // predict not taken (conditional branch only) PrefixPT Prefix = 0x13E // predict taken (conditional branch only) // Size attributes. PrefixDataSize Prefix = 0x66 // operand size override PrefixData16 Prefix = 0x166 PrefixData32 Prefix = 0x266 PrefixAddrSize Prefix = 0x67 // address size override PrefixAddr16 Prefix = 0x167 PrefixAddr32 Prefix = 0x267 // One of a kind. PrefixLOCK Prefix = 0xF0 // lock PrefixREPN Prefix = 0xF2 // repeat not zero PrefixXACQUIRE Prefix = 0x1F2 PrefixBND Prefix = 0x2F2 PrefixREP Prefix = 0xF3 // repeat PrefixXRELEASE Prefix = 0x1F3 // The REX prefixes must be in the range [PrefixREX, PrefixREX+0x10). // the other bits are set or not according to the intended use. PrefixREX Prefix = 0x40 // REX 64-bit extension prefix PrefixREXW Prefix = 0x08 // extension bit W (64-bit instruction width) PrefixREXR Prefix = 0x04 // extension bit R (r field in modrm) PrefixREXX Prefix = 0x02 // extension bit X (index field in sib) PrefixREXB Prefix = 0x01 // extension bit B (r/m field in modrm or base field in sib) PrefixVEX2Bytes Prefix = 0xC5 // Short form of vex prefix PrefixVEX3Bytes Prefix = 0xC4 // Long form of vex prefix ) // IsREX reports whether p is a REX prefix byte. func (p Prefix) IsREX() bool { return p&0xF0 == PrefixREX } func (p Prefix) IsVEX() bool { return p&0xFF == PrefixVEX2Bytes || p&0xFF == PrefixVEX3Bytes } func (p Prefix) String() string { p &^= PrefixImplicit | PrefixIgnored | PrefixInvalid if s := prefixNames[p]; s != "" { return s } if p.IsREX() { s := "REX." if p&PrefixREXW != 0 { s += "W" } if p&PrefixREXR != 0 { s += "R" } if p&PrefixREXX != 0 { s += "X" } if p&PrefixREXB != 0 { s += "B" } return s } return fmt.Sprintf("Prefix(%#x)", int(p)) } // An Op is an x86 opcode. type Op uint32 func (op Op) String() string { i := int(op) if i < 0 || i >= len(opNames) || opNames[i] == "" { return fmt.Sprintf("Op(%d)", i) } return opNames[i] } // An Args holds the instruction arguments. // If an instruction has fewer than 4 arguments, // the final elements in the array are nil. type Args [4]Arg // An Arg is a single instruction argument, // one of these types: Reg, Mem, Imm, Rel. type Arg interface { String() string isArg() } // Note that the implements of Arg that follow are all sized // so that on a 64-bit machine the data can be inlined in // the interface value instead of requiring an allocation. // A Reg is a single register. // The zero Reg value has no name but indicates “no register.” type Reg uint8 const ( _ Reg = iota // 8-bit AL CL DL BL AH CH DH BH SPB BPB SIB DIB R8B R9B R10B R11B R12B R13B R14B R15B // 16-bit AX CX DX BX SP BP SI DI R8W R9W R10W R11W R12W R13W R14W R15W // 32-bit EAX ECX EDX EBX ESP EBP ESI EDI R8L R9L R10L R11L R12L R13L R14L R15L // 64-bit RAX RCX RDX RBX RSP RBP RSI RDI R8 R9 R10 R11 R12 R13 R14 R15 // Instruction pointer. IP // 16-bit EIP // 32-bit RIP // 64-bit // 387 floating point registers. F0 F1 F2 F3 F4 F5 F6 F7 // MMX registers. M0 M1 M2 M3 M4 M5 M6 M7 // XMM registers. X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 // Segment registers. ES CS SS DS FS GS // System registers. GDTR IDTR LDTR MSW TASK // Control registers. CR0 CR1 CR2 CR3 CR4 CR5 CR6 CR7 CR8 CR9 CR10 CR11 CR12 CR13 CR14 CR15 // Debug registers. DR0 DR1 DR2 DR3 DR4 DR5 DR6 DR7 DR8 DR9 DR10 DR11 DR12 DR13 DR14 DR15 // Task registers. TR0 TR1 TR2 TR3 TR4 TR5 TR6 TR7 ) const regMax = TR7 func (Reg) isArg() {} func (r Reg) String() string { i := int(r) if i < 0 || i >= len(regNames) || regNames[i] == "" { return fmt.Sprintf("Reg(%d)", i) } return regNames[i] } // A Mem is a memory reference. // The general form is Segment:[Base+Scale*Index+Disp]. type Mem struct { Segment Reg Base Reg Scale uint8 Index Reg Disp int64 } func (Mem) isArg() {} func (m Mem) String() string { var base, plus, scale, index, disp string if m.Base != 0 { base = m.Base.String() } if m.Scale != 0 { if m.Base != 0 { plus = "+" } if m.Scale > 1 { scale = fmt.Sprintf("%d*", m.Scale) } index = m.Index.String() } if m.Disp != 0 || m.Base == 0 && m.Scale == 0 { disp = fmt.Sprintf("%+#x", m.Disp) } return "[" + base + plus + scale + index + disp + "]" } // A Rel is an offset relative to the current instruction pointer. type Rel int32 func (Rel) isArg() {} func (r Rel) String() string { return fmt.Sprintf(".%+d", r) } // An Imm is an integer constant. type Imm int64 func (Imm) isArg() {} func (i Imm) String() string { return fmt.Sprintf("%#x", int64(i)) } func (i Inst) String() string { var buf bytes.Buffer for _, p := range i.Prefix { if p == 0 { break } if p&PrefixImplicit != 0 { continue } fmt.Fprintf(&buf, "%v ", p) } fmt.Fprintf(&buf, "%v", i.Op) sep := " " for _, v := range i.Args { if v == nil { break } fmt.Fprintf(&buf, "%s%v", sep, v) sep = ", " } return buf.String() } func isReg(a Arg) bool { _, ok := a.(Reg) return ok } func isSegReg(a Arg) bool { r, ok := a.(Reg) return ok && ES <= r && r <= GS } func isMem(a Arg) bool { _, ok := a.(Mem) return ok } func isImm(a Arg) bool { _, ok := a.(Imm) return ok } func regBytes(a Arg) int { r, ok := a.(Reg) if !ok { return 0 } if AL <= r && r <= R15B { return 1 } if AX <= r && r <= R15W { return 2 } if EAX <= r && r <= R15L { return 4 } if RAX <= r && r <= R15 { return 8 } return 0 } func isSegment(p Prefix) bool { switch p { case PrefixCS, PrefixDS, PrefixES, PrefixFS, PrefixGS, PrefixSS: return true } return false } // The Op definitions and string list are in tables.go. var prefixNames = map[Prefix]string{ PrefixCS: "CS", PrefixDS: "DS", PrefixES: "ES", PrefixFS: "FS", PrefixGS: "GS", PrefixSS: "SS", PrefixLOCK: "LOCK", PrefixREP: "REP", PrefixREPN: "REPN", PrefixAddrSize: "ADDRSIZE", PrefixDataSize: "DATASIZE", PrefixAddr16: "ADDR16", PrefixData16: "DATA16", PrefixAddr32: "ADDR32", PrefixData32: "DATA32", PrefixBND: "BND", PrefixXACQUIRE: "XACQUIRE", PrefixXRELEASE: "XRELEASE", PrefixREX: "REX", PrefixPT: "PT", PrefixPN: "PN", } var regNames = [...]string{ AL: "AL", CL: "CL", BL: "BL", DL: "DL", AH: "AH", CH: "CH", BH: "BH", DH: "DH", SPB: "SPB", BPB: "BPB", SIB: "SIB", DIB: "DIB", R8B: "R8B", R9B: "R9B", R10B: "R10B", R11B: "R11B", R12B: "R12B", R13B: "R13B", R14B: "R14B", R15B: "R15B", AX: "AX", CX: "CX", BX: "BX", DX: "DX", SP: "SP", BP: "BP", SI: "SI", DI: "DI", R8W: "R8W", R9W: "R9W", R10W: "R10W", R11W: "R11W", R12W: "R12W", R13W: "R13W", R14W: "R14W", R15W: "R15W", EAX: "EAX", ECX: "ECX", EDX: "EDX", EBX: "EBX", ESP: "ESP", EBP: "EBP", ESI: "ESI", EDI: "EDI", R8L: "R8L", R9L: "R9L", R10L: "R10L", R11L: "R11L", R12L: "R12L", R13L: "R13L", R14L: "R14L", R15L: "R15L", RAX: "RAX", RCX: "RCX", RDX: "RDX", RBX: "RBX", RSP: "RSP", RBP: "RBP", RSI: "RSI", RDI: "RDI", R8: "R8", R9: "R9", R10: "R10", R11: "R11", R12: "R12", R13: "R13", R14: "R14", R15: "R15", IP: "IP", EIP: "EIP", RIP: "RIP", F0: "F0", F1: "F1", F2: "F2", F3: "F3", F4: "F4", F5: "F5", F6: "F6", F7: "F7", M0: "M0", M1: "M1", M2: "M2", M3: "M3", M4: "M4", M5: "M5", M6: "M6", M7: "M7", X0: "X0", X1: "X1", X2: "X2", X3: "X3", X4: "X4", X5: "X5", X6: "X6", X7: "X7", X8: "X8", X9: "X9", X10: "X10", X11: "X11", X12: "X12", X13: "X13", X14: "X14", X15: "X15", CS: "CS", SS: "SS", DS: "DS", ES: "ES", FS: "FS", GS: "GS", GDTR: "GDTR", IDTR: "IDTR", LDTR: "LDTR", MSW: "MSW", TASK: "TASK", CR0: "CR0", CR1: "CR1", CR2: "CR2", CR3: "CR3", CR4: "CR4", CR5: "CR5", CR6: "CR6", CR7: "CR7", CR8: "CR8", CR9: "CR9", CR10: "CR10", CR11: "CR11", CR12: "CR12", CR13: "CR13", CR14: "CR14", CR15: "CR15", DR0: "DR0", DR1: "DR1", DR2: "DR2", DR3: "DR3", DR4: "DR4", DR5: "DR5", DR6: "DR6", DR7: "DR7", DR8: "DR8", DR9: "DR9", DR10: "DR10", DR11: "DR11", DR12: "DR12", DR13: "DR13", DR14: "DR14", DR15: "DR15", TR0: "TR0", TR1: "TR1", TR2: "TR2", TR3: "TR3", TR4: "TR4", TR5: "TR5", TR6: "TR6", TR7: "TR7", }