Added external doynamite68k

2026-01-11 16:33:50 +00:00 · 2016-03-21 20:36:02 +11:00
parent a97fef89b0
commit 701f0a31d7
7 changed files with 1414 additions and 3 deletions
--- a/tools/external/doynamite68k/Makefile
+++ b/tools/external/doynamite68k/Makefile
@ -0,0 +1,12 @@
+PROGRAM=./out/lz
+OBJS=out/lz.o
+
+include ../../../shared/tools.mk
+
+test: 
+	@echo "______  ___   _____ _____ ___________  "
+	@echo "| ___ \/ _ \ /  ___/  ___|  ___|  _  \ "
+	@echo "| |_/ / /_\ \\\\\ \`--.\ \`--.| |__ | | | | "
+	@echo "|  __/|  _  | \`--. \\\`--. \  __|| | | | "
+	@echo "| |   | | | |/\__/ /\__/ / |___| |/ /  "
+	@echo "\_|   \_| |_/\____/\____/\____/|___/   "
--- a/tools/external/doynamite68k/depacker_doynax.asm
+++ b/tools/external/doynamite68k/depacker_doynax.asm
@ -0,0 +1,193 @@
+;-------------------------------------------------------------------------------
+;Lempel-Ziv decompressor by Johan "Doynax" Forsl<73>f. Call doynamitedepack to decode the entire buffer in
+;one go.
+;
+;This is a fork of the 68000 depacker by Michael Hillebrandt (Axis/Oxyron)
+;for the 6502 Doynamite format, except with the encoding rearranged to take
+;somewhat better advantage of the 68k architecture.
+;
+;The present version is 210 bytes long including the tables and should fit
+;snugly in the 68020+ cache.
+;
+;Note that the scheme with a split input stream for odd literal bytes requires
+;a significant safety buffer for forward in-place decompression (about 3% but
+;dependent on the file to be processed)
+;
+;Parameters:
+;a0 = Input buffer to be decompressed. Must be 16-bit aligned!
+;a1 = Output buffer. Points to the end of the data at exit
+;
+;Register legend:
+;d0 = 32-bit shift register terminated by a least-significant 1 bit.
+;d1 = Run length.
+;d2 = Match offset length in bits.
+;d3 = Match offset bitmask.
+;d4 = Constant offset type mask.
+;d5 = Constant offset length bit mask.
+;a0 = Bit-stream pointer.
+;a1 = Continuous output pointer.
+;a2 = Unaligned literal pointer ahead of the bit-stream. Points to the end of
+;     the source buffer at exit.
+;a3 = Mid-point where the literal buffer meets the bit-stream at EOF
+;a4 = Match source pointer.
+;-------------------------------------------------------------------------------
+		;Ensure that the shift-register contains at least 16-bits of data by
+		;checking whether the trailing word is zero, and if so filling up with
+		;another word
+DOY_REFILL macro
+		DOY_REFILL1 doy_full\@
+		DOY_REFILL2
+doy_full\@:
+		endm
+
+DOY_REFILL1 macro
+		tst.w	d0
+		bne.s	\1
+		endm
+
+DOY_REFILL2 macro						;This swaps in the new bits ahead of the
+		move.w	(a0)+,d0				;old, but that's fine as long as the
+		swap.w	d0						;encoder is in on the scheme
+		endm
+
+		;Entry point. Wind up the decruncher
+doynaxdepack:
+		movea.l	(a0)+,a2				;Unaligned literal buffer at the end of
+		adda.l	a0,a2					;the stream
+		move.l	a2,a3
+		move.l	(a0)+,d0				;Seed the shift register
+		moveq	#@70,d4					;Masks for match offset extraction
+		moveq	#@10,d5
+		bra.s	doy_literal
+
+
+		;******** Copy a literal sequence ********
+
+doy_lcopy:								;Copy two bytes at a time, with the
+		move.b	(a0)+,(a1)+				;deferral of the length LSB helping
+		move.b	(a0)+,(a1)+				;slightly in the unrolling
+		dbf		d1,doy_lcopy
+
+		lsl.l	#2,d0					;Copy odd bytes separately in order
+		bcc.s	doy_match				;to keep the source aligned
+doy_lsingle:
+		move.b	(a2)+,(a1)+
+
+
+		;******** Process a match ********
+
+		;Start by refilling the bit-buffer
+doy_match:
+		DOY_REFILL1 doy_mprefix
+		cmp.l	a0,a3					;Take the opportunity to test for the
+		bls.s	doy_return				;end of the stream while refilling
+doy_mrefill:
+		DOY_REFILL2
+
+doy_mprefix:
+		;Fetch the first three bits identifying the match length, and look up
+		;the corresponding table entry
+		rol.l	#3+3,d0
+		move.w	d0,d1
+		and.w	d4,d1
+		eor.w	d1,d0
+		movem.w	doy_table(pc,d1.w),d2/d3/a4
+
+		;Extract the offset bits and compute the relative source address from it
+		rol.l	d2,d0					;Reduced by 3 to account for 8x offset
+		and.w	d0,d3					;scaling
+		eor.w	d3,d0
+		suba.w	d3,a4
+		adda.l	a1,a4
+
+		;Decode the match length
+		DOY_REFILL
+		and.w	d5,d1					;Check the initial length bit from the
+		beq.s	doy_mcopy				;type triple
+
+		moveq	#1,d1					;This loops peeks at the next flag
+		tst.l	d0						;through the sign bit bit while keeping
+		bpl.s	doy_mendlen2			;the LSB in carry
+		lsl.l	#2,d0
+		bpl.s	doy_mendlen1
+doy_mgetlen:
+		addx.b	d1,d1
+		lsl.l	#2,d0
+		bmi.s	doy_mgetlen
+doy_mendlen1:
+		addx.b	d1,d1
+doy_mendlen2:
+
+		;Copy the match data a word at a time. Note that the minimum length is
+		;two bytes
+		lsl.l	#2,d0					;The trailing length payload bit is
+		bcc.s	doy_mhalf				;stored out-of-order
+doy_mcopy:
+		move.b	(a4)+,(a1)+
+doy_mhalf:
+		move.b	(a4)+,(a1)+
+		dbf		d1,doy_mcopy
+
+		;Fetch a bit flag to see whether what follows is a literal run or
+		;another match
+		add.l	d0,d0
+		bcc.s	doy_match
+
+
+		;******** Process a run of literal bytes ********
+
+		DOY_REFILL						;Replenish the shift-register
+doy_literal:
+		;Extract delta-coded run length in the same swizzled format as the
+		;matches above
+		moveq	#0,d1
+		add.l	d0,d0
+		bcc.s	doy_lsingle				;Single out the one-byte case
+		bpl.s	doy_lendlen
+doy_lgetlen:
+		addx.b	d1,d1
+		lsl.l	#2,d0
+		bmi.s	doy_lgetlen
+doy_lendlen:
+		addx.b	d1,d1
+
+		;Branch off to the main copying loop unless the run length is 256 bytes
+		;or greater, in which case the sixteen guaranteed bits in the buffer
+		;may have run out.
+		;In the latter case simply give up and stuff the payload bits back onto
+		;the stream before fetching a literal 16-bit run length instead
+doy_lcopy_near:
+		dbvs	d1,doy_lcopy
+
+		add.l	d0,d0
+		eor.w	d1,d0		
+		ror.l	#7+1,d0					;Note that the constant MSB acts as a
+		move.w	(a0)+,d1				;substitute for the unfetched stop bit
+		bra.s	doy_lcopy_near
+
+
+		;******** Offset coding tables ********
+
+DOY_OFFSET macro
+		dc.w	(\1)-3					;Bit count, reduced by three
+		dc.w	(1<<(\1))-1				;Bit mask
+		dc.w	-(\2)					;Base offset
+		endm
+
+doy_table:
+		DOY_OFFSET 3,1					;Short A
+doy_return:
+		rts
+		DOY_OFFSET 4,1					;Long A
+		dc.w	0						;(Empty hole)
+		DOY_OFFSET 6,1+8				;Short B
+		dc.w	0						;(Empty hole)
+		DOY_OFFSET 7,1+16				;Long B
+		dc.w	0						;(Empty hole)
+		DOY_OFFSET 8,1+8+64				;Short C
+		dc.w	0						;(Empty hole)
+		DOY_OFFSET 10,1+16+128			;Long C
+		dc.w	0						;(Empty hole)
+		DOY_OFFSET 10,1+8+64+256		;Short D
+		dc.w	0						;(Empty hole)
+		DOY_OFFSET 13,1+16+128+1024		;Long D
--- a/tools/external/doynamite68k/lz.c
+++ b/tools/external/doynamite68k/lz.c
@ -0,0 +1,978 @@
+/*
+Lempel-Ziv compressor by Johan "Doynax" Forsl<73>f.
+
+This is based on the 6502 Doynamite format, except with the encoding rearranged to take
+somewhat better advantage of the 68k architecture.
+
+Note that the scheme with a split input stream for odd literal bytes requires
+a significant safety buffer for forward in-place decompression (about 3% but
+dependent on the file to be processed)
+*/
+
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <limits.h>
+#include <assert.h>
+
+// Compile-time configuration
+#define DEFAULT_LENGTHS     "3/6/8/10:4/7/10/13"
+// Decruncher limitations
+#define MATCH_LIMIT         0x100
+#define LITERAL_LIMIT       0x1FFFF
+#define OFFSET_LENGTH_MIN   3
+#define OFFSET_LENGTH_MAX   13
+
+// Some definitions for compiler independence
+#if __STDC_VERSION__ >= 199901L
+#	include <stdbool.h>
+#else
+#	ifdef _MSC_VER
+#		define inline __forceinline
+#	elif defined(__GNUC__)
+#		define inline __inline
+#	else
+#		define inline
+#	endif
+typedef enum { false, true } bool;
+#endif
+
+#ifdef _WIN32
+	// off64_t used instead of _off64_t in file positioning functions.
+	// Yank them altogether for now
+#	if defined(__MINGW32__) && defined(__STRICT_ANSI__)
+#		define __NO_MINGW_LFS
+#	endif
+#	include <malloc.h>
+#	include <io.h>
+#	ifndef alloca
+#		define alloca _alloca
+#	endif
+#else
+#	include <sys/stat.h>
+#	include <alloca.h>
+#endif
+#if defined(__STRICT_ANSI__) && (defined(__MINGW32__) || defined(__CYGWIN__))
+#	ifndef _fileno
+#		define _fileno(p) ((p)->_file)
+#	endif
+#endif
+
+#	define _fileno(p) ((p)->_file)
+
+#if CHAR_BIT != 8
+	// No doubt a 36-bit sign-magnitude machine with signaling integer overflow,
+	// non-extending right shifts, decimal floating point and 6 significant
+	// characters in external identifiers
+#	error
+#endif
+
+#undef min
+
+// The main crunching structure
+typedef struct {
+	signed match_length;
+	unsigned match_offset;
+	union {
+		signed hash_link;
+		unsigned cumulative_cost;
+	};
+} lz_info;
+
+typedef struct {
+	unsigned short *ptr;
+	unsigned mask;
+} lz_bittag_t;
+
+typedef struct {
+	unsigned char *src_data;
+	unsigned src_begin;
+	unsigned src_end;
+
+	lz_info *info;
+
+	unsigned short *dst_wordbuf;
+	unsigned short *dst_wordptr;
+	unsigned short *dst_wordend;
+	unsigned char *dst_litbuf;
+	unsigned char *dst_litptr;
+	unsigned char *dst_litend;
+	lz_bittag_t dst_bitbuf[32];
+
+	signed hash_table[0x100];
+
+	// Some informational counters
+	struct {
+		unsigned output_size;
+		unsigned output_bits;
+		unsigned short_freq[4];
+		unsigned long_freq[4];
+		unsigned literal_bytes;
+		unsigned literal_runs;
+		unsigned match_bytes;
+		unsigned match_count;
+		unsigned offset_distance;
+	} stats;
+} lz_context;
+
+// A bit of global configuration data
+typedef struct  {
+	unsigned bits;
+	unsigned base;
+	signed limit;
+} offset_length_t;
+
+static offset_length_t cfg_short_offset[4];
+static offset_length_t cfg_long_offset[4];
+#define cfg_short_limit (cfg_short_offset[3].limit)
+#define cfg_long_limit (cfg_long_offset[3].limit)
+
+
+/******************************************************************************
+ * Various utility functions and bithacks
+ ******************************************************************************/
+#define countof(n) (sizeof(n) / sizeof *(n))
+
+static inline unsigned _log2(unsigned value) {
+#	ifdef __GNUC__
+	enum { WORD_BITS = sizeof(unsigned) * CHAR_BIT };
+	return (WORD_BITS - 1) ^ __builtin_clz(value);
+#	else
+	signed bits = -1;
+	do
+		++bits;
+	while(value >>= 1);
+	return bits;
+#	endif
+}
+
+static inline unsigned min(unsigned a, unsigned b) {
+	return (a < b) ? a : b;
+}
+
+#ifdef _MSC_VER
+__declspec(noreturn)
+#elif defined(__GNUC__)
+__attribute__((noreturn))
+__attribute__((format(printf, 1, 2)))
+#endif
+static void
+#ifdef _MSC_VER
+__cdecl
+#endif
+fatal(const char *format, ...) {
+	va_list args;
+
+	va_start(args, format);
+	fputs("error: ", stderr);
+	vfprintf(stderr, format, args);
+	fputc('\n', stderr);
+	va_end(args);
+
+	exit(EXIT_FAILURE);
+}
+
+
+
+/******************************************************************************
+ * Manage the output stream
+ ******************************************************************************/
+static inline void output_write(lz_context *ctx, const char *name) {
+	FILE *file = fopen(name, "wb");
+	unsigned length;
+	unsigned short *ptr;
+	unsigned short swap;
+	if(!file)
+		fatal("error: cannot create '%s'", name);
+	// Write offset to the literal section
+	length = (ctx->dst_wordptr - ctx->dst_wordbuf) * 2;
+	putc(length >> 24, file);
+	putc(length >> 16, file);
+	putc(length >> 8, file);
+	putc(length >> 0, file);
+	// The seed word needs to be stored in the opposite word order
+	ptr = ctx->dst_wordbuf;
+	swap = ptr[0];
+	ptr[0] = ptr[1];
+	ptr[1] = swap;
+	// Write bit-buffer words in big-endian order
+	for(; ptr < ctx->dst_wordptr; ++ptr) {
+		putc(*ptr >> 8, file);
+		putc(*ptr >> 0, file);
+	}
+	// Finally close with the literals words
+	fwrite(ctx->dst_litbuf, ctx->dst_litptr - ctx->dst_litbuf,
+		sizeof *ctx->dst_litbuf, file);
+	ctx->stats.output_size = ftell(file);
+	fclose(file);
+}
+
+static unsigned short *output_word(lz_context *ctx) {
+	unsigned short *ptr = ctx->dst_wordptr++;
+	if(ptr == ctx->dst_wordend)
+		fatal("out of output buffer space");
+	return ptr;
+}
+
+static inline void output_flush(lz_context *ctx) {
+	if(!ctx->dst_bitbuf[16].ptr) {
+		size_t i;
+		unsigned short *ptr = output_word(ctx);
+		*ptr = 0;
+		for(i = 0; i < 16; ++i) {
+			ctx->dst_bitbuf[i] = ctx->dst_bitbuf[i + 16];
+			ctx->dst_bitbuf[i + 16].ptr = ptr;
+			ctx->dst_bitbuf[i + 16].mask = 1 << i;
+		}
+	}
+}
+
+static void output_bits_at(lz_context *ctx, unsigned data, unsigned len,
+	unsigned offset) {
+	lz_bittag_t *bit;
+	offset = 32 - offset;
+	for(bit = &ctx->dst_bitbuf[offset - len]; bit < &ctx->dst_bitbuf[offset]; ++bit) {
+		if(data & 1)
+			*bit->ptr |= bit->mask;
+		data >>= 1;
+	}
+	memmove(&ctx->dst_bitbuf[len], ctx->dst_bitbuf,
+		(offset - len) * sizeof *ctx->dst_bitbuf);
+	memset(ctx->dst_bitbuf, 0, len * sizeof *ctx->dst_bitbuf);
+}
+
+static void output_bits(lz_context *ctx, unsigned data, unsigned len) {
+	output_bits_at(ctx, data, len, 0);
+}
+
+static void output_bit(lz_context *ctx, unsigned data) {
+	output_bits(ctx, data, 1);
+}
+
+static void output_literal(lz_context *ctx, unsigned value) {
+	unsigned char *ptr = ctx->dst_litptr++;
+	if(ptr == ctx->dst_litend)
+		fatal("out of literal buffer space");
+	*ptr = value;
+}
+
+static inline unsigned output_bitsize(lz_context *ctx) {
+	const lz_bittag_t *bit;
+
+	unsigned total = ctx->dst_wordptr - ctx->dst_wordbuf;
+	total <<= 1;
+	total += ctx->dst_litptr - ctx->dst_litbuf;
+
+	// Count up the unused remaining bits at the end
+	total <<= 3;
+	for(bit = &ctx->dst_bitbuf[31]; bit->ptr; --bit)
+		--total;
+
+	ctx->stats.output_bits = total;
+	return total;
+}
+
+static inline unsigned output_init(lz_context *ctx, unsigned bits,
+	unsigned literals) {
+	unsigned seed;
+	unsigned words;
+
+	// Drop the separately stored literals
+	signed adjustment = literals * 8;
+	bits -= adjustment;
+	words = (bits + 15) >> 4;
+	// Fudge factor
+	words += 2;
+
+	// Allocate the output buffers
+	ctx->dst_wordbuf = malloc(words * sizeof *ctx->dst_wordbuf);
+	ctx->dst_wordptr = ctx->dst_wordbuf;
+	ctx->dst_wordend = &ctx->dst_wordptr[words];
+
+	ctx->dst_litbuf = malloc(literals * sizeof *ctx->dst_litbuf);
+	ctx->dst_litptr = ctx->dst_litbuf;
+	ctx->dst_litend = &ctx->dst_litend[words];
+
+	// Throw away any left-over bits required to round up the bit-stream length
+	// to full 16-bit words. Also append the '1' bit sentinel. This serves to
+	// creates a seed word
+	output_flush(ctx);
+	seed = (0 - bits) & 15;
+	seed += 16;
+	*ctx->dst_bitbuf[seed].ptr |= ctx->dst_bitbuf[seed].mask;
+	memset(ctx->dst_bitbuf, 0, sizeof *ctx->dst_bitbuf * (seed + 1));
+
+	// Return the adjusted size
+	return bits + adjustment;
+}
+
+
+/******************************************************************************
+ * Read file into memory and allocate per-byte buffers
+ ******************************************************************************/
+static void read_input(lz_context *ctx, const char *name) {
+	FILE *file;
+	signed length;
+
+	if(file = fopen(name, "rb"), !file)
+		fatal("unable to open '%s'", name);
+
+#	ifdef _WIN32
+	length = _filelength(_fileno(file));
+#	else
+	{
+		struct stat stat;
+		stat.st_size = 0;
+		fstat(_fileno(file), &stat);
+		length = stat.st_size;
+	}
+#	endif
+
+	if(length <= 0)
+		fatal("cannot determine length of '%s'", name);
+
+	{
+		// Give us a sentinel for the info structure and prevent two-byte
+		// hashing from overrunning the buffer
+		unsigned count = length + 1;
+
+		ctx->info = malloc(count *
+			(sizeof *ctx->info + sizeof *ctx->src_data));
+		ctx->src_data = (void *) &ctx->info[count];
+
+		if(!ctx->info)
+			fatal("cannot allocate memory buffer");
+
+		if(fread(ctx->src_data, length, 1, file) != 1)
+			fatal("cannot read '%s'", name);
+	}
+
+	ctx->src_begin = 0;
+	ctx->src_end = length;
+}
+
+
+/******************************************************************************
+ * Try to figure out what matches would be the most beneficial
+ ******************************************************************************/
+static inline unsigned costof_run(unsigned run) {
+	return _log2(run) * 2 + 1;
+}
+
+static inline unsigned costof_literals(unsigned address, unsigned length) {
+	unsigned cost = length * 8;
+	// Long (8-bit+) runs have a special raw encoding
+	if(length >= 256)
+		return cost + 17 /* run length */ - 7 /* push back */ + 16 /* raw length */;
+	return cost + costof_run(length);
+}
+
+static inline unsigned costof_match(const offset_length_t *class, signed offset,
+	unsigned length) {
+	unsigned cost = 3;
+
+	while(offset > class->limit)
+		++class;
+	cost += class->bits;
+
+	return cost + costof_run(length - 1);
+}
+
+static inline lz_info optimal_parsing_literal(const lz_info *info, unsigned cursor) {
+	unsigned cost;
+	lz_info result;
+
+	signed length = -info[cursor + 1].match_length;
+
+	if(length > 0)
+		cost = info[cursor + ++length].cumulative_cost;
+	else {
+		cost = info[cursor + 1].cumulative_cost;
+		length = 1;
+	}
+
+	cost += costof_literals(cursor, length);
+
+	result.match_length = -length;
+	result.cumulative_cost = cost;
+	return result;
+}
+
+static inline lz_info optimal_parsing (
+	const lz_info *info,
+	unsigned cursor,
+	signed match_offset,
+	unsigned match_length,
+	unsigned match_limit,
+	lz_info best_match
+) {
+	unsigned cost;
+
+	if(match_length == 2) {
+		if(match_offset <= cfg_short_limit) {
+			cost = costof_match(cfg_short_offset, match_offset, match_length);
+			goto try_short_match;
+		} else if(++match_length > match_limit)
+			return best_match;
+	}
+
+	do {
+		cost = costof_match(cfg_long_offset, match_offset, match_length);
+try_short_match:
+		cost += info[cursor + match_length].cumulative_cost;
+
+		if(cost < best_match.cumulative_cost) {
+			best_match.match_offset = match_offset;
+			best_match.match_length = match_length;
+			best_match.cumulative_cost = cost;
+		}
+	} while(++match_length <= match_limit);
+
+	return best_match;
+}
+
+
+
+/******************************************************************************
+ * Determine the longest match for every position of the file
+ ******************************************************************************/
+static inline signed *hashof(lz_context *ctx, unsigned a, unsigned b) {
+	static const unsigned char random[] = {
+		0x17, 0x80, 0x95, 0x4f, 0xc7, 0xd1, 0x15, 0x13,
+		0x91, 0x57, 0x0f, 0x47, 0xd0, 0x59, 0xab, 0xf0,
+		0xa7, 0xf5, 0x36, 0xc0, 0x24, 0x9c, 0xed, 0xfd,
+		0xd4, 0xf3, 0x51, 0xb4, 0x8c, 0x97, 0xa3, 0x58,
+		0xcb, 0x61, 0x78, 0xb1, 0x3e, 0x7e, 0xfb, 0x41,
+		0x39, 0xa6, 0x8e, 0x10, 0xa1, 0xba, 0x62, 0xcd,
+		0x94, 0x02, 0x0d, 0x2b, 0xdb, 0xd7, 0x44, 0x16,
+		0x29, 0x4d, 0x68, 0x0a, 0x6b, 0x6c, 0xa2, 0xf8,
+		0xc8, 0x9f, 0x25, 0xca, 0xbd, 0x4a, 0xc2, 0x35,
+		0x53, 0x1c, 0x40, 0x04, 0x76, 0x43, 0xa9, 0xbc,
+		0x46, 0xeb, 0x99, 0xe9, 0xf6, 0x5e, 0x8f, 0x8a,
+		0xf1, 0x5d, 0x21, 0x33, 0x0b, 0x82, 0xdf, 0x52,
+		0xea, 0x27, 0x22, 0x9a, 0x6f, 0xad, 0xe5, 0x83,
+		0x11, 0xbe, 0xa4, 0x85, 0x1d, 0xb3, 0x77, 0xf4,
+		0xef, 0xb7, 0xf2, 0x03, 0x64, 0x6d, 0x1b, 0xee,
+		0x72, 0x08, 0x66, 0xc6, 0xc1, 0x06, 0x56, 0x81,
+		0x55, 0x60, 0x70, 0x8d, 0x23, 0xb2, 0x65, 0x5b,
+		0xff, 0x4c, 0xb9, 0x7a, 0xd6, 0xe6, 0x19, 0x9b,
+		0xb5, 0x49, 0x7d, 0xd8, 0x45, 0x1a, 0x84, 0x32,
+		0xdd, 0xbf, 0x9e, 0x2f, 0xd2, 0xec, 0x92, 0x0e,
+		0xe8, 0x7c, 0x7f, 0x00, 0x86, 0xde, 0xb6, 0xcf,
+		0x05, 0x69, 0xd5, 0x37, 0xe4, 0x30, 0x3c, 0xe1,
+		0x4b, 0xaa, 0x3b, 0x2d, 0xda, 0x5c, 0xcc, 0x67,
+		0x20, 0xb0, 0x6a, 0x1f, 0xf9, 0x01, 0xac, 0x2e,
+		0x71, 0xf7, 0xfc, 0x3f, 0x42, 0xd3, 0xbb, 0xa8,
+		0x38, 0xce, 0x12, 0x96, 0xe2, 0x14, 0x87, 0x4e,
+		0x63, 0x07, 0xae, 0xdc, 0xa5, 0xc9, 0x0c, 0x90,
+		0xe7, 0xd9, 0x09, 0x2a, 0xc4, 0x3d, 0x5a, 0x34,
+		0x8b, 0x88, 0x98, 0x48, 0xfa, 0xc3, 0x26, 0x75,
+		0xfe, 0xa0, 0x7b, 0x50, 0x2c, 0x89, 0x18, 0x9d,
+		0x3a, 0x73, 0x6e, 0x5f, 0xc5, 0xaf, 0xb8, 0x74,
+		0x93, 0xe3, 0x79, 0x28, 0xe0, 0x1e, 0x54, 0x31
+	};
+
+	size_t bucket = random[a] ^ b;
+	return &ctx->hash_table[bucket];
+}
+
+static inline void generate_hash_table(lz_context *ctx) {
+	unsigned cursor;
+
+	const unsigned src_end = ctx->src_end;
+	const unsigned char *src_data = ctx->src_data;
+	lz_info *info = ctx->info;
+
+	for(cursor = 0; cursor < countof(ctx->hash_table); ++cursor)
+		ctx->hash_table[cursor] = INT_MIN;
+
+	for(cursor = ctx->src_begin; cursor != src_end; ++cursor) {
+		signed *hash_bucket = hashof (
+			ctx,
+			src_data[cursor + 0],
+			src_data[cursor + 1]
+		);
+
+		info[cursor].hash_link = *hash_bucket;
+		*hash_bucket = cursor;
+	}
+}
+
+static inline void find_matches(lz_context *ctx) {
+	const unsigned src_begin = ctx->src_begin;
+	const unsigned src_end = ctx->src_end;
+	const unsigned char *src_data = ctx->src_data;
+	lz_info *info = ctx->info;
+
+	unsigned offset_limit = cfg_long_limit;
+	unsigned cursor = ctx->src_end;
+
+	// Install a sentinel at the end of the array
+	info[cursor].match_offset = SHRT_MAX;
+	info[cursor].match_length = 0;
+	info[cursor].cumulative_cost = 0;
+
+	while(cursor != src_begin) {
+		unsigned match_length;
+		signed cursor_limit;
+		unsigned length_limit;
+		signed *hash_bucket;
+		signed hash_link;
+		lz_info best_match;
+
+		--cursor;
+
+		match_length = 1;
+		cursor_limit = cursor - offset_limit;
+
+		length_limit = min(MATCH_LIMIT, src_end - cursor);
+
+		hash_bucket = hashof (
+			ctx,
+			src_data[cursor + 0],
+			src_data[cursor + 1]
+		);
+
+		assert((unsigned) *hash_bucket == cursor);
+		hash_link = info[cursor].hash_link;
+		*hash_bucket = hash_link;
+
+		best_match = optimal_parsing_literal(info, cursor);
+
+		while(hash_link >= cursor_limit) {
+			unsigned int match_limit = min(hash_link, length_limit);
+
+			if(match_length < match_limit) {
+				unsigned i = match_length + 1;
+
+				if(!memcmp(&src_data[cursor], &src_data[hash_link], i)) {
+					for(; i != match_limit; ++i) {
+						if(src_data[cursor + i] != src_data[hash_link + i])
+							break;
+					}
+
+					assert(i <= match_limit);
+
+					best_match = optimal_parsing (
+						info,
+						cursor,
+						cursor - hash_link,
+						match_length + 1,
+						i,
+						best_match
+					);
+
+					match_length = i;
+
+					if(match_length == MATCH_LIMIT)
+						break;
+				}
+			}
+
+			hash_link = info[hash_link].hash_link;
+		}
+
+		info[cursor] = best_match;
+	}
+}
+
+
+/******************************************************************************
+ * Write the generated matches and literal runs
+ ******************************************************************************/
+static inline void encode_literals (
+	lz_context *ctx,
+	unsigned cursor,
+	unsigned length
+) {
+	unsigned bit;
+	const unsigned char *data;
+
+	ctx->stats.literal_bytes += length;
+	++ctx->stats.literal_runs;
+
+	output_flush(ctx);
+
+	// Gamma coding with the final data bit juggled around to the end
+	if(length < 256) {
+		bit = _log2(length);
+		if(!bit) {
+			output_bit(ctx, 0);
+		} else {
+			output_bit(ctx, 1);
+			while(--bit) {
+				output_bit(ctx, 1);
+				output_bit(ctx, length >> bit);
+			}
+			output_bit(ctx, 0);
+			output_bit(ctx, length);
+		}
+	// A special raw mode is used for 8-bit+ literals
+	} else {
+		// Just bail if it doesn't even fit in 128 kB
+		if(length > LITERAL_LIMIT)
+			fatal("incompressible file. %u byte literal block generated", length);
+		output_bit(ctx, 1);
+		// Leave the odd data bits on the stream, they'll be reused later on
+		for(bit = 0; bit < 7; ++bit)
+			output_bits_at(ctx, 1, 1, bit);
+		output_bits_at(ctx, 0, 1, bit);
+		// Store a raw 16-bit length for the unrolled copying loop
+		*output_word(ctx) = length >> 1;
+		// Include the final odd bit as usual
+		output_bit(ctx, length);
+	}
+
+	// First write the bulk of the literals in 16-bit byte pairs
+	data = &ctx->src_data[cursor];
+	while(length >= 2) {
+		unsigned short *ptr = output_word(ctx);
+		*ptr = *data++ << 8;
+		*ptr |= *data++ << 0;
+		length -= 2;
+	}
+	// Write the final odd byte on a separate stream
+	if(length & 1)
+		output_literal(ctx, *data);
+}
+
+static inline void encode_match (
+	lz_context *ctx,
+	signed offset,
+	unsigned length
+) {
+	unsigned offset_prefix;
+	const offset_length_t *offset_class;
+	signed length_bit;
+
+	++ctx->stats.match_count;
+	ctx->stats.match_bytes += length;
+	ctx->stats.offset_distance += offset;
+
+	output_flush(ctx);
+
+	// Write offset prefix
+	if(length == 2) {
+		assert(offset <= cfg_short_limit);
+		offset_prefix = 0;
+		offset_class = cfg_short_offset;
+
+		while(offset > offset_class->limit) {
+			++offset_class;
+			++offset_prefix;
+		}
+
+		++ctx->stats.short_freq[offset_prefix];
+	} else {
+		assert(offset <= cfg_long_limit);
+		offset_prefix = 0;
+		offset_class = cfg_long_offset;
+
+		while(offset > offset_class->limit) {
+			++offset_class;
+			++offset_prefix;
+		}
+
+		++ctx->stats.long_freq[offset_prefix];
+	}
+
+	// Include the initial length bit in the prefix code
+	length_bit = _log2(--length);
+	offset_prefix <<= 1;
+	if(length_bit)
+		++offset_prefix;
+	output_bits(ctx, offset_prefix, 3);
+
+	// Write offset payload
+	offset -= offset_class->base;
+	output_bits(ctx, offset, offset_class->bits);
+
+	output_flush(ctx);
+
+	// Write out the remaining length bits
+	while(length_bit--) {
+		output_bit(ctx, !!length_bit);
+		output_bit(ctx, length >> length_bit);
+	}
+}
+
+static inline void write_output(lz_context *ctx, bool show_trace) {
+	unsigned cursor;
+	unsigned literals;
+
+	bool implicit = true;
+	unsigned src_end = ctx->src_end;
+	lz_info *info = ctx->info;
+	signed length;
+
+	unsigned expected = info[ctx->src_begin].cumulative_cost;
+
+	// Sum up all of the unaligned literal bytes in a pre-pass
+	literals = 0;
+	for(cursor = ctx->src_begin; cursor < src_end; cursor += length) {
+		length = info[cursor].match_length;
+		if(length < 0) {
+			length = -length;
+			literals += length & 1;
+		}
+	}
+
+	// Allocate and seed the output buffers
+	expected = output_init(ctx, expected, literals);
+
+	// Encode all of the matches and literals 
+	for(cursor = ctx->src_begin; cursor < src_end; cursor += length) {
+		length = info[cursor].match_length;
+
+		if(length > 0) {
+			unsigned offset;
+
+			if(!implicit)
+				output_bit(ctx, 0);
+
+			offset = info[cursor].match_offset;
+			encode_match(ctx, offset, length);
+
+			if(show_trace) {
+				printf (
+					"$%04x %smatch(-%u/$%04x, %u bytes)\n",
+					cursor,
+					implicit ? "" : "explicit-",
+					offset,
+					cursor - offset,
+					length
+				);
+			}
+
+			implicit = false;
+		} else {
+			length = -length;
+
+			if(!implicit)
+				output_bit(ctx, 1);
+			encode_literals(ctx, cursor, length);
+
+			if(show_trace) {
+				printf (
+					"$%04x literal(%u bytes)\n",
+					cursor,
+					length
+				);
+			}
+
+			implicit = true;
+		}
+	}
+
+	if(!implicit)
+		output_bit(ctx, 0);
+
+	// The model must match the real length to the bit in order for the bit-stream
+	// to be aligned at the end
+	(void) output_bitsize(ctx);
+	// The sentinel ought to just have been shifted in
+	if(ctx->dst_bitbuf[16].ptr || !ctx->dst_bitbuf[17].ptr)
+		fatal("improper bit-stream alignment");
+}
+
+
+/******************************************************************************
+ * Parse out the set of offset bit lengths from a descriptor string
+ ******************************************************************************/
+static void prepare_offset_lengths(offset_length_t *table, size_t count) {
+	unsigned base;
+	unsigned limit = 0;
+	unsigned previous = 0;
+
+	do {
+		unsigned int bits = table->bits;
+
+		if(bits <= previous)
+			fatal("offset lengths must be listed in ascending order");
+		previous = bits;
+		if(bits < OFFSET_LENGTH_MIN)
+			fatal("offset lengths cannot be narrower than %u bits", OFFSET_LENGTH_MIN);
+		if(bits > OFFSET_LENGTH_MAX)
+			fatal("offset lengths cannot be wider than %u bits", OFFSET_LENGTH_MAX);
+
+		base = limit + 1;
+		limit += 1 << bits;
+		table->base = base;
+		table->limit = limit;
+		++table;
+	} while(--count);
+}
+
+static inline bool parse_offset_lengths(const char *text) {
+	if(sscanf(text, "%u/%u/%u/%u:%u/%u/%u/%u",
+		&cfg_short_offset[0].bits, &cfg_short_offset[1].bits,
+		&cfg_short_offset[2].bits, &cfg_short_offset[3].bits,
+		&cfg_long_offset[0].bits, &cfg_long_offset[1].bits,
+		&cfg_long_offset[2].bits, &cfg_long_offset[3].bits) != 8) {
+		return false;
+	}
+	prepare_offset_lengths(cfg_short_offset, 4);
+	prepare_offset_lengths(cfg_long_offset, 4);
+	return true;
+}
+
+
+/******************************************************************************
+ * Print some basic statistics about the encoding of the file
+ ******************************************************************************/
+static inline void print_statistics(const lz_context *ctx, FILE *file) {
+	unsigned input_size = ctx->src_end - ctx->src_begin;
+
+	fprintf (
+		file,
+		"input file:\t"    "%u bytes\n"
+		"output file:\t"   "%u bytes, %u bits (%.2f%% ratio)\n"
+		"short offsets:\t" "{ %u-%u: %u, %u-%u: %u, %u-%u: %u, %u-%u: %u }\n"
+		"long offsets:\t"  "{ %u-%u: %u, %u-%u: %u, %u-%u: %u, %u-%u: %u }\n"
+		"%u matches:\t"    "%u bytes, %f avg\n"
+		"%u literals:\t"   "%u bytes, %f avg\n"
+		"avg offset:\t"    "%f bytes\n",
+
+		input_size,
+		ctx->stats.output_size,
+		ctx->stats.output_bits,
+		100.0 * ctx->stats.output_size / input_size,
+
+		cfg_short_offset[0].base,
+		cfg_short_offset[0].limit,
+		ctx->stats.short_freq[0],
+		cfg_short_offset[1].base,
+		cfg_short_offset[1].limit,
+		ctx->stats.short_freq[1],
+		cfg_short_offset[2].base,
+		cfg_short_offset[2].limit,
+		ctx->stats.short_freq[2],
+		cfg_short_offset[3].base,
+		cfg_short_offset[3].limit,
+		ctx->stats.short_freq[3],
+		cfg_long_offset[0].base,
+		cfg_long_offset[0].limit,
+		ctx->stats.long_freq[0],
+		cfg_long_offset[1].base,
+		cfg_long_offset[1].limit,
+		ctx->stats.long_freq[1],
+		cfg_long_offset[2].base,
+		cfg_long_offset[2].limit,
+		ctx->stats.long_freq[2],
+		cfg_long_offset[3].base,
+		cfg_long_offset[3].limit,
+		ctx->stats.long_freq[3],
+
+		ctx->stats.match_count,
+		ctx->stats.match_bytes,
+		(double) ctx->stats.match_bytes / ctx->stats.match_count,
+
+		ctx->stats.literal_runs,
+		ctx->stats.literal_bytes,
+		(double) ctx->stats.literal_bytes / ctx->stats.literal_runs,
+
+		(double) ctx->stats.offset_distance / ctx->stats.match_count
+	);
+}
+
+
+/******************************************************************************
+ * The main function
+ ******************************************************************************/
+int
+#ifdef _MSC_VER
+__cdecl
+#endif
+main(int argc, char *argv[]) {
+	const char *input_name;
+	unsigned name_length;
+	unsigned i;
+
+	lz_context ctx;
+
+	// Parse the command line
+	const char *program_name = *argv;
+	char *output_name = NULL;
+	bool show_stats = false;
+	bool show_trace = false;
+	memset(&ctx, 0, sizeof ctx);
+	parse_offset_lengths(DEFAULT_LENGTHS);
+
+	while(++argv, --argc) {
+		if(argc >= 2 && !strcmp(*argv, "-o")) {
+			output_name = *++argv;
+			--argc;
+		} else if(argc >= 2 && !strcmp(*argv, "--offset-lengths")) {
+			if(!parse_offset_lengths(*++argv))
+				break;
+			--argc;
+		} else if(!strcmp(*argv, "--statistics")) {
+			show_stats = true;
+		} else if(!strcmp(*argv, "--trace-coding")) {
+			show_trace = true;
+		} else {
+			break;
+		}
+	}
+
+	if(argc != 1) {
+		fprintf (
+			stderr,
+			"syntax: %s\n"
+			"\t[-o output.lz]\n"
+			"\t[--offset-lengths s1/s2/s3/s4:l1/l2/l3/l4]\n"
+			"\t[--statistics]\n"
+			"\t[--trace-coding]\n"
+			"\t{input.bin}\n",
+			program_name
+		);
+		return EXIT_FAILURE;
+	}
+
+	input_name = *argv;
+
+	// Check extension to figure out whether it's a .PRG file
+	name_length = 0;
+
+	for(i = 0; input_name[i]; ++i) {
+		switch(input_name[i]) {
+		case '/':
+		case '\\':
+		case ':':
+			name_length = 0;
+			break;
+		case '.':
+			name_length = i;
+			break;
+		}
+	}
+
+	if(!name_length)
+		name_length = i;
+
+	// If necessary generate output file by substituting the
+	// extension for .lz
+	if(!output_name) {
+		static const char extension[] = ".lz";
+
+		output_name = alloca(name_length + sizeof extension);
+
+		memcpy(output_name, input_name, name_length);
+		memcpy(&output_name[name_length], extension, sizeof extension);
+	}
+
+	// Do the compression
+	read_input(&ctx, input_name);
+	generate_hash_table(&ctx);
+	find_matches(&ctx);
+
+	write_output(&ctx, show_trace);
+	output_write(&ctx, output_name);
+
+	// Display some statistics gathered in the process
+	if(show_stats)
+		print_statistics(&ctx, stdout);
+	return EXIT_SUCCESS;
+}
--- a/tools/external/doynamite68k/lz.sln
+++ b/tools/external/doynamite68k/lz.sln
@ -0,0 +1,20 @@
+
+Microsoft Visual Studio Solution File, Format Version 10.00
+# Visual Studio 2008
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "lz", "lz.vcproj", "{6FB22E12-5802-4F3A-AD71-CE8EBC523531}"
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|Win32 = Debug|Win32
+		Release|Win32 = Release|Win32
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{6FB22E12-5802-4F3A-AD71-CE8EBC523531}.Debug|Win32.ActiveCfg = Debug|Win32
+		{6FB22E12-5802-4F3A-AD71-CE8EBC523531}.Debug|Win32.Build.0 = Debug|Win32
+		{6FB22E12-5802-4F3A-AD71-CE8EBC523531}.Release|Win32.ActiveCfg = Release|Win32
+		{6FB22E12-5802-4F3A-AD71-CE8EBC523531}.Release|Win32.Build.0 = Release|Win32
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+EndGlobal
--- a/tools/external/doynamite68k/lz.vcproj
+++ b/tools/external/doynamite68k/lz.vcproj
@ -0,0 +1,201 @@
+<?xml version="1.0" encoding="Windows-1252"?>
+<VisualStudioProject
+	ProjectType="Visual C++"
+	Version="9,00"
+	Name="lz"
+	ProjectGUID="{6FB22E12-5802-4F3A-AD71-CE8EBC523531}"
+	RootNamespace="lz"
+	Keyword="Win32Proj"
+	TargetFrameworkVersion="131072"
+	>
+	<Platforms>
+		<Platform
+			Name="Win32"
+		/>
+	</Platforms>
+	<ToolFiles>
+	</ToolFiles>
+	<Configurations>
+		<Configuration
+			Name="Debug|Win32"
+			OutputDirectory="Debug"
+			IntermediateDirectory="Debug"
+			ConfigurationType="1"
+			InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
+			CharacterSet="2"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="0"
+				AdditionalIncludeDirectories=""
+				PreprocessorDefinitions="WIN32;_DEBUG;_WINDOWS"
+				MinimalRebuild="true"
+				ExceptionHandling="1"
+				BasicRuntimeChecks="3"
+				RuntimeLibrary="1"
+				UsePrecompiledHeader="0"
+				PrecompiledHeaderThrough=""
+				WarningLevel="3"
+				Detect64BitPortabilityProblems="false"
+				DebugInformationFormat="4"
+				CompileAs="1"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLinkerTool"
+				AdditionalDependencies=""
+				OutputFile="lz.exe"
+				LinkIncremental="2"
+				AdditionalLibraryDirectories=""
+				IgnoreDefaultLibraryNames=""
+				GenerateDebugInformation="true"
+				ProgramDatabaseFile="$(OutDir)/lz.pdb"
+				SubSystem="1"
+				RandomizedBaseAddress="1"
+				DataExecutionPrevention="0"
+				TargetMachine="1"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCManifestTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCAppVerifierTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Release|Win32"
+			OutputDirectory="Release"
+			IntermediateDirectory="Release"
+			ConfigurationType="1"
+			InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
+			CharacterSet="2"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				AdditionalIncludeDirectories=""
+				PreprocessorDefinitions="WIN32;NDEBUG;_WINDOWS"
+				ExceptionHandling="1"
+				RuntimeLibrary="0"
+				UsePrecompiledHeader="0"
+				PrecompiledHeaderThrough=""
+				WarningLevel="3"
+				Detect64BitPortabilityProblems="false"
+				DebugInformationFormat="3"
+				CompileAs="1"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLinkerTool"
+				AdditionalDependencies=""
+				OutputFile="lz.exe"
+				LinkIncremental="1"
+				AdditionalLibraryDirectories=""
+				IgnoreAllDefaultLibraries="false"
+				IgnoreDefaultLibraryNames=""
+				GenerateDebugInformation="true"
+				SubSystem="1"
+				OptimizeReferences="2"
+				EnableCOMDATFolding="2"
+				RandomizedBaseAddress="1"
+				DataExecutionPrevention="0"
+				TargetMachine="1"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCManifestTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCAppVerifierTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+	</Configurations>
+	<References>
+	</References>
+	<Files>
+		<Filter
+			Name="src"
+			>
+			<File
+				RelativePath=".\lz.c"
+				>
+			</File>
+		</Filter>
+	</Files>
+	<Globals>
+	</Globals>
+</VisualStudioProject>