1
0
mirror of https://github.com/weiju/amiga-stuff synced 2025-11-22 18:14:44 +00:00
Files
amiga-stuff/utils/hunktools/disassemble.py
Wei-ju Wu 23379213dd rearranged repository structure for tools
fdfool and hunktools now under utils
2016-01-27 12:58:24 -08:00

442 lines
15 KiB
Python

import struct
from collections import deque
ADDR_MODES = {
'000': 'Dn', '001': 'An', '010': '(An)', '011': '(An)+', '100': '-(An)',
'101': '(d16,An)', '110': '(d8,An,Xn)', '111': 'EXT' # -> ADDR_MODES_EXT
}
ADDR_MODES_EXT = {
'000': '(xxx).W', '001': '(xxx).L', '100': '#<data>',
'010': '(d16,PC)', '011': '(d8,PC,Xn)'
}
OPCODE_CATEGORIES = {
'0000': 'bitops_movep_imm', '0001': 'move.b', '0010': 'move.l', '0011': 'move.w',
'0100': 'misc', '0101': 'addq_subq', '1001': 'sub_subx', '1011': 'cmp_eor',
'0110': 'bcc_bsr_bra', '0111': 'moveq',
'1101': 'add_addx', '1110': 'shift_rotate'
}
OPMODES = {
'000': ('b', 'ea,dn->dn'), '001': ('w', 'ea,dn->dn'),'010': ('l', 'ea,dn->dn'),
'100': ('b', 'dn,ea->ea'), '101': ('w', 'dn,ea->ea'),'110': ('l', 'dn,ea->ea')
}
SIZES = ['b', 'w', 'l']
CONDITION_CODES = [
't', 'f', 'hi', 'ls',
'cc', 'cs', 'ne', 'eq',
'vc', 'vs', 'pl', 'mi',
'ge', 'lt', 'gt', 'le'
]
######################################################################
#### Operand classes / Addressing modes
######################################################################
class IntConstant:
def __init__(self, value):
self.value = value
def __repr__(self):
return '#%02x' % self.value
class DataRegister:
def __init__(self, regnum):
self.regnum = regnum
def __repr__(self):
return 'd%d' % self.regnum
class AddressRegister:
def __init__(self, regnum):
self.regnum = regnum
def __repr__(self):
return 'a%d' % self.regnum
class AddressRegisterIndirect:
def __init__(self, regnum):
self.regnum = regnum
def __repr__(self):
return '(a%d)' % self.regnum
class AddressRegisterIndirectPost:
def __init__(self, regnum):
self.regnum = regnum
def __repr__(self):
return '(a%d)+' % self.regnum
class AddressRegisterIndirectPre:
def __init__(self, regnum):
self.regnum = regnum
def __repr__(self):
return '-(a%d)' % self.regnum
class AddressRegisterIndirectDisplacement:
def __init__(self, regnum, displacement):
self.regnum = regnum
self.displacement = displacement
def __repr__(self):
return '%02x(a%d)' % (self.displacement, self.regnum)
class AddressRegisterIndirectDisplacementIndex:
def __init__(self, regnum, displacement, index):
self.regnum = regnum
self.displacement = displacement
self.index = iindex
def __repr__(self):
return '(%02x,a%d,%02x)' % (self.displacement, self.regnum, self.index)
######################################################################
#### Opcode classes
######################################################################
class Opcode(object):
def __init__(self, name, size):
self.name = name
self.size = size
def is_branch(self):
return False
def is_absolute_branch(self):
return False
def is_local_branch(self):
return False
def is_return(self):
return self.name == 'rts'
def __repr__(self):
return "%s" % self.name
class Operation2(Opcode):
def __init__(self, name, size, src, dest):
Opcode.__init__(self, name, size)
self.src = src
self.dest = dest
def __repr__(self):
return "%s\t%s,%s" % (self.name, self.src, self.dest)
class Operation1(Opcode):
"""A single operand operation"""
def __init__(self, name, size, dest):
Opcode.__init__(self, name, size)
self.dest = dest
def __repr__(self):
return "%s\t%s" % (self.name, self.dest)
class Jump(Opcode):
def __init__(self, name, pc, size, displacement):
Opcode.__init__(self, name, size)
self.displacement = displacement
self.pc = pc
def is_branch(self):
return True
def is_absolute_branch(self):
return self.name == 'bra'
def is_local_branch(self):
"""a very simplified model assumption of a local branch for now, which
excludes jumps. An improved version will also look at the relocation
addresses, since a relocation target is likely a local address"""
return self.name.startswith('b')
def __repr__(self):
# TODO: branches currently are displayed using their offset
# and not their destination
# jump locations are defined correctly
# To adjust the branch destination, we need to actually
# add the displacement to the address after the instruction
if self.is_local_branch():
if self.size == 2:
return "%s.s\t%x" % (self.name, self.pc + self.size + self.displacement)
else:
return "%s\t%x" % (self.name, self.pc + self.size + self.displacement)
else:
return "%s\t%s" % (self.name, self.displacement)
def is_move(category):
return category in {'move.b', 'move.w', 'move.l'}
def next_word(size, data, data_offset):
if size == 'L':
value = struct.unpack('>i', data[data_offset:data_offset+4])[0]
added = 4
elif size == 'W':
data[data_offset:data_offset+2]
value = struct.unpack('>h', data[data_offset:data_offset+2])[0]
added = 2
elif size == 'B':
data[data_offset:data_offset+2]
value = (struct.unpack('>h', data[data_offset:data_offset+2])[0]) & 0xff
added = 2
else:
raise Exception('unsupported size: ', size)
return (value, added)
def operand(size, mode_bits, reg_bits, data, offset, skip=0):
result = ""
added = 0
mode = ADDR_MODES[mode_bits]
size = size.upper()
if mode == 'EXT':
mode = ADDR_MODES_EXT[reg_bits]
regnum = int(reg_bits, 2)
if mode == '#<data>':
imm_value, added = next_word(size, data, offset + 2 + skip)
result = IntConstant(imm_value)
elif mode in {'(xxx).L', '(xxx).W'}: # absolute
addr, added = next_word(mode[-1], data, offset + 2 + skip)
result = "%02x.%s" % (addr, mode[-1])
elif mode == '(d16,PC)':
disp16, added = next_word('W', data, offset + 2 + skip)
result = "%02x(PC)" % disp16
else:
raise Exception("unsupported ext mode: '%s'" % mode)
elif mode == '(d16,An)':
regnum = int(reg_bits, 2)
disp16, added = next_word('W', data, offset + 2 + skip)
result = AddressRegisterIndirectDisplacement(regnum, disp16)
elif mode == 'An':
result = AddressRegister(int(reg_bits, 2))
elif mode == '(An)':
result = AddressRegisterIndirect(int(reg_bits, 2))
elif mode == '(An)+':
result = AddressRegisterIndirectPost(int(reg_bits, 2))
elif mode == '-(An)':
result = AddressRegisterIndirectPre(int(reg_bits, 2))
elif mode == '(d8,An,Xn)':
#result = AddressRegisterIndirectDisplacementIndex(int(reg_bits, 2))
raise Exception('unsupported mode: ', mode)
elif mode == 'Dn':
result = DataRegister(int(reg_bits, 2))
else:
raise Exception('unsupported mode: ', mode)
return result, added
def disassemble_move(bits, data, offset):
category = OPCODE_CATEGORIES[bits[0:4]]
total_added = 2
# dst: reg|mode
dst_op, added = operand(category[-1], bits[7:10], bits[4:7], data, offset)
total_added += added
# src: mode|reg
src_op,added = operand(category[-1], bits[10:13], bits[13:16], data, offset)
total_added += added
return Operation2(category, total_added, src_op, dst_op)
def disassemble_add_sub(name, bits, data, offset):
total_added = 2
reg = "D%d" % int(bits[4:7], 2)
size, operation = OPMODES[bits[7:10]]
ea, added = operand(size, bits[10:13], bits[13:16], data, offset)
total_added += added
if operation == 'ea,dn->dn':
src = ea
dst = reg
elif operation == 'dn,ea->ea':
src = reg
dst = ea
else:
raise Exception('Unknown operation for %s' % name)
return Operation2('%s.%s' % (name, size), total_added, src, dst)
def disassemble_misc(bits, data, offset):
#print("misc bits: " + bits)
if bits == '0100111001110101': # rts
return Opcode('rts', 2)
elif bits == '0100101011111100': # illegal
return Opcode('illegal', 2)
elif bits[7:10] == '111': # lea
regnum = int(bits[4:7], 2)
ea, added = operand('l', bits[10:13], bits[13:16], data, offset)
return Operation2('lea', added + 2, ea, AddressRegister(regnum))
elif bits.startswith('0100111010'): # jsr
ea, added = operand('l', bits[10:13], bits[13:16], data, offset)
return Jump('jsr', offset, added + 2, ea)
elif bits.startswith('0100111011'): # jmp
ea, added = operand('l', bits[10:13], bits[13:16], data, offset)
return Jump('jmp', added + 2, ea)
elif bits.startswith('01001010'): # tst.x
size = SIZES[int(bits[8:10], 2)]
ea, added = operand('l', bits[10:13], bits[13:16], data, offset)
return Operation1('tst.%s' % size, added + 2, ea)
else:
print("unrecognized misc: %s" % bits)
raise Exception('TODO Misc')
def signed8(value):
return -(256 - value) if value > 127 else value
def branch_displacement(bits, data, offset):
"""disp, added = read displacement"""
disp = signed8(int(bits[8:16], 2))
if disp == 0: # 16 bit displacement
return next_word('W', data, offset + 2)
elif disp == -1: # 32 bit displacement
return next_word('L', data, offset + 2)
else:
return disp, 0
def _disassemble(data, offset):
bits = "{0:016b}".format(struct.unpack(">H", data[offset:offset+2])[0])
# first step categorize by looking at bits 15-12
opcode = bits[0:4]
category = OPCODE_CATEGORIES[opcode]
if is_move(category):
instr = disassemble_move(bits, data, offset)
elif category in {'add_addx', 'sub_subx'}:
if bits[7] == 1 and bits[10:12] == '11': # extended
raise Exception('addx/subx not supported yet')
else:
instr = disassemble_add_sub(category[0:3], bits, data, offset)
elif category == 'misc':
instr = disassemble_misc(bits, data, offset)
elif category == 'moveq':
regnum = int(bits[4:7], 2)
value = signed8(int(bits[8:16], 2))
instr = Operation2('moveq', 2, IntConstant(value), DataRegister(regnum))
elif category == 'bcc_bsr_bra':
if bits[0:8] == '01100000': # bra
disp, added = branch_displacement(bits, data, offset)
instr = Jump('bra', offset, added + 2, disp)
elif bits[0:8] == '01100001': # bsr
disp, added = branch_displacement(bits, data, offset)
instr = Jump('bsr', offset, added + 2, disp)
else:
cond = CONDITION_CODES[int(bits[4:8], 2)]
disp, added = branch_displacement(bits, data, offset)
instr = Jump('b%s' % cond, offset, added + 2, disp)
elif category == 'addq_subq':
if bits[7] == '0': # addq
ea, added = operand('l', bits[10:13], bits[13:16], data, offset)
value = int(bits[4:7], 2)
instr = Operation2('addq', added + 2, IntConstant(value), ea)
else:
raise Exception('TODO addq_subq etc')
elif category == 'bitops_movep_imm':
if bits[0:10] == '0000100000':
ea, added1 = operand('l', bits[10:13], bits[13:16], data, offset, skip=2)
bitnum, added2 = next_word('W', data, offset + 2)
instr = Operation2('btst', added1 + added2 + 2, IntConstant(bitnum & 0xff), ea)
elif bits[0:8] == '00001100':
size = SIZES[int(bits[8:10], 2)]
immdata, added1 = next_word(size.upper(), data, offset + 2)
ea, added2 = operand(size, bits[10:13], bits[13:16], data, offset, skip=added1)
instr = Operation2('cmpi.' + size, added1 + added2 + 2, IntConstant(immdata), ea)
else:
detail = bits[8:11]
print("bits at offset: %d -> %s" % (offset, bits))
raise Exception('TODO: bitops, detail: ' + detail)
elif category == 'cmp_eor':
regnum = int(bits[4:7], 2)
opmode = bits[7:10]
addr_mode = OPMODES[opmode]
size = addr_mode[0]
if addr_mode[1] == 'ea,dn->dn':
ea, added = operand(size, bits[10:13], bits[13:16], data, offset)
instr = Operation2('cmp.' + size, added + 2, ea, DataRegister(regnum))
else:
raise Exception('TODO: cmp_eor')
else:
print("\nUnknown instruction\nCategory: ", category, " Bits: ", bits)
raise Exception('TODO')
#print("%d: %s" % (offset, instr))
return instr
def print_instruction(address, instr):
print("$%08x:\t%s" % (address, instr))
def disassemble(code):
"""Disassembling a chunk of code works on this assumptions:
the first address in the block contains a valid instruction from here
a. branches: add the branch target to the list of continue points
b. if the instruction is an absolute jump/branch, we can't safely
assume the code after the instruction is valid -> continue at branch
target
c. conditional branch -> add the address after the instruction as a valid
ass valid decoding location
d. rts: we can't assume the code after this instruction is valid
In order to achieve an ordered sequence of instructions, we store the
disassembled instructions and their addresses in a list and sort them
in ascending order after completion
"""
reachable = deque([0])
seen = set()
result = []
while len(reachable) > 0: # offset < len(code):
offset = reachable.popleft()
#print("offset is now: %d" % offset)
seen.add(offset)
instr = _disassemble(code, offset)
result.append((offset, instr))
if instr.is_return():
continue # we can't assume any valid code to come after a return
# enqueue the address after the instruction
if not instr.is_absolute_branch():
new_dest = offset + instr.size
if new_dest < len(code) and new_dest not in seen:
reachable.append(new_dest)
# following jumps and branches is non-trivial the problem is that we need to be
# able to tell local from global branches. For now, only branch instructions
# are recognized as local branches.
# TODO: jumps can be local as well, if the destination is to a relocatable
# address, we need to include that information, too
if instr.is_local_branch():
# note that the branch target is computed based on the address after the
# 16 bit opcode, ignoring additional extension words in the displacement
branch_dest = offset + 2 + instr.displacement
if not branch_dest in seen:
reachable.append(branch_dest)
result.sort(key=lambda x: x[0])
for addr, instr in result:
print_instruction(addr, instr)