Descent3/libmve/mve.asm
Ryan C. Gordon 6c8977caf0
Heavy patching for compiler warnings.
The vast majority of this is fixing up `char *` that should be `const char *`
but a handful of other fixes, like potential buffer overflows that GCC
noticed, etc, were applied as well.

This removes `-Wno-write-strings` from CMakeLists.txt, as it is no longer
necessary, as there is no longer a flood of compiler warning spam when
building.

This does not fix all compiler warnings; there are still a handful, and they
are legitimate, but they can be dealt with in a future commit.
2024-04-29 00:18:56 -04:00

9636 lines
170 KiB
NASM

; .386
.486 ; I only need .386, but I wanted the 486 cycle timings
.MODEL FLAT, C
;;--- Types ---
PTRBYTE TYPEDEF PTR BYTE
PTRWORD TYPEDEF PTR WORD
PTRDWORD TYPEDEF PTR DWORD
PTRPROC TYPEDEF PTR PROC
;;--- Constants ---
; Width and height of sections in pixels.
SWIDTH equ 8
SHEIGHT equ 8
LOG2_SWIDTH equ 3
LOG2_SHEIGHT equ 3
;;---
EXTERN pal_tbl:BYTE ;unsigned char pal_tbl[3*256];
EXTERN pal15_tbl:WORD ;unsigned short pal15_tbl[256];
;; NextFrame working storage
; MemRec nf_mem_buf1;
; MemRec nf_mem_buf2;
EXTERN nf_buf_cur: PTRBYTE ; unsigned char* nf_buf_cur;
EXTERN nf_buf_prv: PTRBYTE ; unsigned char* nf_buf_prv;
;; NextFrame parameters
EXTERN nf_wqty: BYTE ;unsigned char nf_wqty; // (width/SWIDTH)
EXTERN nf_hqty: BYTE ;unsigned char nf_hqty; // (height/SHEIGHT)
EXTERN nf_fqty: BYTE ;unsigned char nf_fqty; // Number of fields
EXTERN nf_hicolor: DWORD ;unsigned nf_hicolor; // HiColor (0:none,1:normal,2:swapped)
;; <derived quantities>
EXTERN nf_width: DWORD ;unsigned nf_width; // wqty * SWIDTH
EXTERN nf_height: DWORD ;unsigned nf_height; // hqty * SHEIGHT;
EXTERN nf_new_line: DWORD ;unsigned nf_new_line; // width - SWIDTH
EXTERN nf_new_row0: DWORD ;unsigned nf_new_row0; // SHEIGHT*width*2-width
EXTERN nf_back_right: DWORD ;unsigned nf_back_right; // (SHEIGHT-1)*width
;; Frame parameters
;; Portion of current frame which has been updated
;; and needs to be sent to screen.
;;
EXTERN nf_new_x: DWORD ;unsigned nf_new_x;
EXTERN nf_new_y: DWORD ;unsigned nf_new_y;
EXTERN nf_new_w: DWORD ;unsigned nf_new_w;
EXTERN nf_new_h: DWORD ;unsigned nf_new_h;
.data
BYTE "(c) 1997 Interplay Productions. All Rights Reserved.\n"
BYTE "This file is confidential and consists of proprietary information\n"
BYTE "of Interplay Productions. This file and associated libraries\n"
BYTE "may not, in whole or in part, be disclosed to third parties,\n"
BYTE "incorporated into any software product which is not being created\n"
BYTE "for Interplay Productions, copied or duplicated in any form,\n"
BYTE "without the prior written permission of Interplay Productions.\n"
BYTE "Further, you may not reverse engineer, decompile or otherwise\n"
BYTE "attempt to derive source code of this material.\n",0
.code
NF_DECOMP_INIT MACRO HI_COLOR_FLAG: REQ
mov ax, ds ; Insure es==ds for symantec flat mode
mov es, ax
mov eax, nf_buf_prv ; DiffBufPtrs = nf_buf_prv - nf_buf_cur
sub eax, nf_buf_cur
mov DiffBufPtrs, eax
xor ebx, ebx ; ebx = nf_fqty (convert to 32-bits)
mov bl, nf_fqty
mov eax, x ; nf_new_x = x*SWIDTH*2^HI_COLOR_FLAG;
shl eax, LOG2_SWIDTH+HI_COLOR_FLAG
mov nf_new_x, eax
mov eax, w ; nf_new_w = w*SWIDTH*2^HI_COLOR_FLAG;
shl eax, LOG2_SWIDTH+HI_COLOR_FLAG
mov nf_new_w, eax
mov eax, y ; nf_new_y = y*nf_fqty*SHEIGHT;
shl eax, LOG2_SHEIGHT
mul ebx ;nf_fqty
mov nf_new_y, eax
mov eax, h ; nf_new_h = h*nf_fqty*SHEIGHT;
shl eax, LOG2_SHEIGHT
mul ebx ;nf_fqty
mov nf_new_h, eax
mov eax, nf_new_row0 ; new_row = nf_new_row0 - nf_new_w;
sub eax, nf_new_w
mov new_row, eax
;; Move to correct place in current buffer
mov eax, nf_buf_cur ; tbuf = nf_buf_cur
mov tbuf, eax
.if x || y ; if (x||y)
mov eax, nf_new_y ; tbuf += nf_new_y*nf_width + nf_new_x;
mul nf_width
add eax, nf_new_x
add tbuf, eax
.endif
ENDM ; DECOMP_INIT
DECOMP_BODY MACRO HI_COLOR_FLAG:REQ
LOCAL HI_COLOR_SCALE
HI_COLOR_SCALE equ HI_COLOR_FLAG+1
NF_DECOMP_INIT HI_COLOR_FLAG
mov eax, w ; parms_sz = (w*h*nf_fqty)<<1
mul h
mul ebx ;nf_fqty
shl eax, 1
mov parms_sz, eax
; esi indexes comp (to get new section data)
; edi indexes current screen buffer
; edx is a frequently used constant
; ebx indexes section params
mov edi, tbuf
mov edx, nf_new_line ; width - SWIDTH
mov ebx, comp ; Parms index
mov esi, ebx
add esi, parms_sz ; Skip over flags (w*h*2)
; Iterate over params and copy new hires data to appropriate sections.
mov cl, nf_fqty
ns_0f: push ecx
push edi
mov ch, byte ptr h
ns_0: mov cl, byte ptr w
ns_1: cmp word ptr [ebx],0
je ns_10
add edi, SWIDTH*HI_COLOR_SCALE
ns_2: add ebx, 2
dec cl
jnz ns_1
add edi, new_row ; SHEIGHT*width - SWIDTH*w
dec ch
jnz ns_0
pop edi
pop ecx
add edi, nf_width
dec cl
jnz ns_0f
jmp ns_99
; Copy new data to one section
; Enter with esi pointing to source data, edi to screen section.
; Assumes SWIDTH=8 (16-bit data) and SHEIGHT=8
ns_10:
REPEAT 7
REPEAT 2*HI_COLOR_SCALE
movsd
ENDM
add edi, edx
ENDM
REPEAT 2*HI_COLOR_SCALE
movsd
ENDM
sub edi, nf_back_right ; (SHEIGHT-1)*width
jmp ns_2
ns_99:
; Iterate over flags and motion source addresses from params
; to determine which sections to move.
; ebx indexes params.
; esi indexes source from buffer
; esi will be computed as +- 16K relative to edi.
sub ebx, parms_sz ; Move back to start of section parms
mov edi, tbuf
mov cl, nf_fqty
xor esi, esi
ms_0f: push ecx
push edi
mov ch, byte ptr h
ms_0: mov cl, byte ptr w
ms_1: or si, [ebx]
jg ms_10
jl ms_j30
add edi, SWIDTH*HI_COLOR_SCALE
ms_2: add ebx, 2
dec cl
jnz ms_1
add edi, new_row ; SHEIGHT*width - SWIDTH*w
dec ch
jnz ms_0
pop edi
pop ecx
add edi, nf_width
dec cl
jnz ms_0f
jmp ms_99
ms_j30: jmp ms_30
; Move one section from current screen to current screen.
; Enter with
; edi pointing to destination screen section,
; relative value of source offset in esi.
; The following assumes SWIDTH==8 and SHEIGHT==8
ms_10: ; Make esi absolute
lea esi, [esi*HI_COLOR_SCALE-04000h*HI_COLOR_SCALE+edi]
REPEAT 7
REPEAT 2*HI_COLOR_SCALE
movsd
ENDM
add esi, edx
add edi, edx
ENDM
REPEAT 2*HI_COLOR_SCALE
movsd
ENDM
sub edi, nf_back_right ; (SHEIGHT-1)*width
xor esi, esi ; Reset esi to zero
jmp ms_2
ms_20f: push ecx
push edi
mov ch, byte ptr h
ms_20: mov cl, byte ptr w
ms_21: or si, [ebx]
jl ms_30
jg ms_j10
add edi, SWIDTH*HI_COLOR_SCALE
ms_22: add ebx, 2
dec cl
jnz ms_21
add edi, new_row ; SHEIGHT*width - SWIDTH*w
dec ch
jnz ms_20
pop edi
pop ecx
add edi, nf_width
dec cl
jnz ms_20f
jmp ms_99
ms_j10: jmp ms_10
; Move one section from previous screen to current screen.
; Enter with
; edi pointing to destination screen section,
; relative value of source offset in esi.
; The following assumes SWIDTH==8 and SHEIGHT==8
ms_30: ; Make esi absolute
lea esi, [esi*HI_COLOR_SCALE-0C000h*HI_COLOR_SCALE+edi]
add esi, DiffBufPtrs ; and point to other buffer
REPEAT 7
REPEAT 2*HI_COLOR_SCALE
movsd
ENDM
add esi, edx
add edi, edx
ENDM
REPEAT 2*HI_COLOR_SCALE
movsd
ENDM
sub edi, nf_back_right ; (SHEIGHT-1)*width
xor esi, esi ; Reset esi to zero
jmp ms_22
ms_99:
ENDM ; DECOMP_BODY
; Non-HiColor versions
; Decompress into subsection of current buffer specified
; by x,y,w,h in units of SWIDTHxSHEIGHT (8x8).
;
;void nfDecomp(unsigned char *comp,
; unsigned x, unsigned y, unsigned w, unsigned h)
;
nfDecomp PROC USES ESI EDI EBX, comp:PTRBYTE, x:DWORD, y:DWORD, w:DWORD, h:DWORD
LOCAL tbuf: PTRBYTE
LOCAL new_row: DWORD
LOCAL DiffBufPtrs: DWORD
LOCAL parms_sz: DWORD
.if nf_hicolor
INVOKE nfHiColorDecomp, comp,x,y,w,h
ret
.endif
DECOMP_BODY 0 ; Not HiColor
ret
nfDecomp ENDP
; Decompress into subsection of current buffer specified
; by x,y,w,h in units of SWIDTHxSHEIGHT (8x8).
;
;void
;nfHiColorDecomp(unsigned char *comp,
; unsigned x, unsigned y, unsigned w, unsigned h)
;
nfHiColorDecomp PROC USES ESI EDI EBX,comp:PTRBYTE,x:DWORD, y:DWORD, w:DWORD, h:DWORD
LOCAL tbuf: PTRBYTE
LOCAL new_row: DWORD
LOCAL DiffBufPtrs: DWORD
LOCAL parms_sz: DWORD
DECOMP_BODY 1 ; HiColor
ret
nfHiColorDecomp ENDP
DECOMP_CHG_BODY MACRO HI_COLOR_FLAG:REQ
LOCAL HI_COLOR_SCALE
HI_COLOR_SCALE equ HI_COLOR_FLAG+1
NF_DECOMP_INIT HI_COLOR_FLAG
; esi indexes comp (to get new section data)
; edi indexes current screen buffer
; edx is a frequently used constant
; ebx indexes section params
mov edi, tbuf
mov edx, nf_new_line ; width - SWIDTH
mov esi, comp
mov ebx, parms
; Iterate over params and copy new hires data to appropriate sections.
mov eax, chgs
mov pChgs, eax
mov eax, 0
mov cl, nf_fqty
ns_0f: push ecx
push edi
mov ch, byte ptr h
ns_0: mov cl, byte ptr w
ns_1: add ax, ax
ja ns_1b
jz ns_5
cmp word ptr [ebx],0
je ns_10
add ebx, 2
ns_1b: add edi, SWIDTH*HI_COLOR_SCALE
ns_2: dec cl
jnz ns_1
add edi, new_row ; SHEIGHT*width - SWIDTH*w
dec ch
jnz ns_0
pop edi
pop ecx
add edi, nf_width
dec cl
jnz ns_0f
jmp ns_99
ns_5: mov eax, pChgs
add pChgs, 2
mov ax, [eax]
jmp ns_1
; Copy new data to one section
; Enter with ds:si pointing to source data, es:di to screen section.
; Assumes SWIDTH=8 (16-bit data) and SHEIGHT=8
ns_10:
REPEAT 7
REPEAT 2*HI_COLOR_SCALE
movsd
ENDM
add edi, edx
ENDM
REPEAT 2*HI_COLOR_SCALE
movsd
ENDM
sub edi, nf_back_right ; (SHEIGHT-1)*width
add ebx, 2
jmp ns_2
ns_99:
; Iterate over flags and motion source addresses from params
; to determine which sections to move.
; ebx indexes params.
; esi indexes source from buffer
; esi will be computed as +- 16K relative to edi.
mov edi, tbuf
mov ebx, parms
mov eax, chgs
mov pChgs, eax
mov eax, 0
mov cl, byte ptr nf_fqty
xor esi, esi
ms_0f: push ecx
push edi
mov ch, byte ptr h
ms_0: mov cl, byte ptr w
ms_1: add ax, ax
ja ms_1b
jz ms_5
or si, [ebx]
jg ms_10
jl ms_j30
add ebx, 2
ms_1b: add edi, SWIDTH*HI_COLOR_SCALE
ms_2: dec cl
jnz ms_1
add edi, new_row ; SHEIGHT*width - SWIDTH*w
dec ch
jnz ms_0
pop edi
pop ecx
add edi, nf_width
dec cl
jnz ms_0f
jmp ms_99
ms_5: mov eax, pChgs
add pChgs, 2
mov ax, word ptr [eax]
jmp ms_1
ms_j30: jmp ms_30
; Move one section from current screen to current screen.
; Enter with
; edi pointing to destination screen section,
; relative value of source offset in esi.
; The following assumes SWIDTH==8 and SHEIGHT==8
ms_10: ; Make esi absolute
lea esi, [esi*HI_COLOR_SCALE-04000h*HI_COLOR_SCALE+edi]
REPEAT 7
REPEAT 2*HI_COLOR_SCALE
movsd
ENDM
add esi, edx
add edi, edx
ENDM
REPEAT 2*HI_COLOR_SCALE
movsd
ENDM
sub edi, nf_back_right ; (SHEIGHT-1)*width
xor esi, esi ; Reset esi to zero
add ebx, 2
jmp ms_2
ms_20f: push ecx
push edi
mov ch, byte ptr h
ms_20: mov cl, byte ptr w
ms_21: add ax, ax
ja ms_21b
jz ms_25
or si, [ebx]
jl ms_30
jg ms_j10
add ebx, 2
ms_21b: add edi, SWIDTH*HI_COLOR_SCALE
ms_22: dec cl
jnz ms_21
add edi, new_row ; SHEIGHT*width - SWIDTH*w
dec ch
jnz ms_20
pop edi
pop ecx
add edi, nf_width
dec cl
jnz ms_20f
jmp ms_99
ms_25: mov eax, pChgs
add pChgs, 2
mov ax, [eax]
jmp ms_21
ms_j10: jmp ms_10
; Move one section from previous screen to current screen.
; Enter with
; edi pointing to destination screen section,
; relative value of source offset in esi.
; The following assumes SWIDTH==8 and SHEIGHT==8
ms_30: ; Make esi absolute
lea esi, [esi*HI_COLOR_SCALE-0C000h*HI_COLOR_SCALE+edi]
add esi, DiffBufPtrs ; and point to other buffer
REPEAT 7
REPEAT 2*HI_COLOR_SCALE
movsd
ENDM
add esi, edx
add edi, edx
ENDM
REPEAT 2*HI_COLOR_SCALE
movsd
ENDM
sub edi, nf_back_right ; (SHEIGHT-1)*width
add ebx, 2
xor esi, esi ; Reset esi to zero
jmp ms_22
ms_99:
ENDM ; DECOMP_CHG_BODY
; Decompress into subsection of current buffer specified
; by x,y,w,h in units of SWIDTHxSHEIGHT (8x8).
; Chgs specifies which squares to update.
; Parms are motion parms for squares to update.
;
;void
;nfDecompChg(unsigned short *chgs,
; unsigned short *parms,
; unsigned char *comp,
; unsigned x, unsigned y, unsigned w, unsigned h)
;
nfDecompChg PROC USES ESI EDI EBX,chgs:PTRWORD, parms:PTRWORD,comp:PTRBYTE,x:DWORD, y:DWORD, w:DWORD, h:DWORD
LOCAL tbuf: PTRBYTE
LOCAL new_row: DWORD
LOCAL DiffBufPtrs: DWORD
LOCAL pChgs: PTRBYTE
.if nf_hicolor
INVOKE nfHiColorDecompChg, chgs,parms,comp,x,y,w,h
ret
.endif
DECOMP_CHG_BODY 0 ; Not HiColor
ret
nfDecompChg ENDP
; Decompress into subsection of current buffer specified
; by x,y,w,h in units of SWIDTHxSHEIGHT (8x8).
; Chgs specifies which squares to update.
; Parms are motion parms for squares to update.
;
;void
;nfHiColorDecompChg(unsigned short *chgs,
; unsigned short *parms,
; unsigned char *comp,
; unsigned x, unsigned y, unsigned w, unsigned h)
;
nfHiColorDecompChg PROC USES ESI EDI EBX,chgs:PTRWORD,parms:PTRWORD,comp:PTRBYTE,x:DWORD, y:DWORD, w:DWORD, h:DWORD
LOCAL tbuf: PTRBYTE
LOCAL new_row: DWORD
LOCAL DiffBufPtrs: DWORD
LOCAL pChgs: PTRBYTE
DECOMP_CHG_BODY 1 ; HiColor
ret
nfHiColorDecompChg ENDP
.data
; luminace table for palette entries
lum_tbl DWORD 256 DUP (0)
; signed 8-bit y * nf_width
nfpk_ShiftY DWORD 256 DUP (0)
; Constant tables
; 8-bit -8:7 x nf_width + -8:7
nfpk_ShiftP1 LABEL WORD
FOR y, <-8,-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7>
FOR x, <-8,-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7>
BYTE x,y
ENDM
ENDM
; 8-bit to right and below in roughly 0:14*nf_width + -14:14 (-3 cases)
; negative is
; 8-bit to left and above in roughly -14:0*nf_width + -14:14 (-3 cases)
nfpk_ShiftP2 LABEL WORD
FOR y, <0,1,2,3,4,5,6,7>
FOR x, <8,9,10,11,12,13,14>
BYTE x,y
ENDM
ENDM
FOR y, <8,9,10,11,12,13>
FOR x, <-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1>
BYTE x,y
ENDM
FOR x, <0,1,2,3,4,5,6,7,8,9,10,11,12,13,14>
BYTE x,y
ENDM
ENDM
FOR x, <-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1>
BYTE x,14
ENDM
FOR x, <0,1,2,3,4,5,6,7,8,9,10,11>
BYTE x,14
ENDM
nfpk_mov4l LABEL DWORD
; mov ax, bx,cx
MOV4L_REGS TEXTEQU <!<0c0h+3,0c0h+1!>>
%FOR m4, MOV4L_REGS
% FOR m3, MOV4L_REGS
% FOR m2, MOV4L_REGS
% FOR m1, MOV4L_REGS
BYTE m2,m1,m4,m3
ENDM
ENDM
ENDM
ENDM
nfpk_mov8 LABEL DWORD
; mov ax, bx/dx/cx/bp
MOV8_REGS TEXTEQU <!<0c0h+3,0c0h+2,0c0h+1,0c0h+5!>>
%FOR m4, MOV8_REGS
% FOR m3, MOV8_REGS
% FOR m2, MOV8_REGS
% FOR m1, MOV8_REGS
BYTE m2,m1,m4,m3
ENDM
ENDM
ENDM
ENDM
nfpk_mov4 LABEL DWORD
; mov al, bl/bh/cl/ch
MOV4_REGS0 TEXTEQU <!<0c0h+3,0c0h+7,0c0h+1,0c0h+5!>>
; mov ah, bl/bh/cl/ch
MOV4_REGS1 TEXTEQU <!<0e0h+3,0e0h+7,0e0h+1,0e0h+5!>>
%FOR m4, MOV4_REGS1
% FOR m3, MOV4_REGS0
% FOR m2, MOV4_REGS1
% FOR m1, MOV4_REGS0
BYTE m3,m4,m1,m2
ENDM
ENDM
ENDM
ENDM
.code
; nfPkConfig initializes tables used by nfPkDecomp
; which are dependent on screen size.
nfPkConfig PROC USES ESI EDI EBX
; Build ShiftY table
;
lea edi, nfpk_ShiftY
mov ebx, nf_width
mov eax, 0
mov ecx, 128
lp1: mov [edi], eax
add edi,4
add eax,ebx
dec ecx
jne lp1
mov eax, ebx
shl eax, 7
neg eax
mov ecx, 128
lp2: mov [edi], eax
add edi,4
add eax,ebx
dec ecx
jne lp2
ret
nfPkConfig ENDP
EXTERN sf_LineWidth: DWORD ;unsigned sf_LineWidth; // Distance between lines in memory
; Banked screen parameters
EXTERN sf_SetBank: PTRPROC ;unsigned long sf_SetBank;
EXTERN sf_WinGran: DWORD ;unsigned sf_WinGran;
EXTERN sf_WinSize: DWORD ;unsigned long sf_WinSize;
EXTERN sf_WinGranPerSize: DWORD ;unsigned sf_WinGranPerSize;
;{sf_WriteWinPtr and sf_WriteWinLimit replace sf_WriteWinSeg, see mveliba.asm}
EXTERN sf_WriteWinPtr: PTRBYTE ;unsigned char *sf_WriteWinPtr;
EXTERN sf_WriteWinLimit: PTRBYTE ;unsigned char *WriteWinLimit;
EXTERN sf_WriteWin: DWORD ;unsigned sf_WriteWin;
EXTERN opt_hscale_step: DWORD
EXTERN opt_hscale_adj: DWORD
;void mve_ShowFrameField(
; unsigned char *buf, unsigned bufw, unsigned bufh,
; unsigned sx, unsigned sy, unsigned w, unsigned h,
; unsigned dstx, unsigned dsty, unsigned field)
mve_ShowFrameField PROC USES ESI EDI EBX, buf:PTRBYTE, bufw:DWORD, bufh:DWORD, sx:DWORD, sy:DWORD, w:DWORD, h:DWORD, dstx:DWORD, dsty:DWORD, field:DWORD
LOCAL bank:DWORD
LOCAL w4:DWORD
LOCAL new_src_line:DWORD
LOCAL linestep:DWORD
LOCAL new_dst_line:DWORD
mov ax, ds ; Insure es==ds for symantec flat mode
mov es, ax
mov eax, w ; w4 = w>>2
shr eax, 2
mov w4, eax
;;; <WIP>
;;; In stretched width mode, we either keep 4/5 (a) of the source pixels,
;;; or duplicate every fourth pixel to magnify by 5/4 (b).
;;; In these cases, new_src_line is either bufw-w*5/4 (a) or bufw-w*4/5 (b).
;;; Let ScaleStep be 5 (a) or 3 (b) instead of 4. This is the amount to advance
;;; the source after copying 32-bits from source to destination.
;;; The coordinate system used for the source will be a simulated scaled system.
;;; Rather than scale height, I plan to use alternate vertical resolutions. However,
;;; it might be a good idea to also provide for scaled height in case we want a
;;; higher resolution border.
;;; Question: Do we still need to support transferring subrectangles?
.if opt_hscale_step==4
mov eax, bufw ; new_src_line = bufw - w
sub eax, w
mov new_src_line, eax
.else
mov eax, opt_hscale_adj
mov new_src_line, eax
.endif
mov eax, sf_LineWidth ; linestep = sf_LineWidth<<1;
.if field ; if (field)
add eax, eax ; linestep <<= 1;
.endif
mov linestep, eax
sub eax, w ; new_dst_line = linestep - w;
mov new_dst_line, eax
mov eax, sy ; buf += sy*bufw + sx
mul bufw
add eax, sx
add buf, eax
mov eax, sx ; dstx += sx
add dstx, eax
; <WIP> This is a hack. We should pass in src x,y of origin
; or make dstx/dsty absolute.
;
mov eax, bufw ; if (field && sx >= (bufw>>1)
shr eax, 1
.if field && sx >= eax
sub dstx, eax ; dstx -= bufw>>1
.endif
mov eax, sy ; dsty += sy
add dsty, eax
.if sf_SetBank==0 ;------------------
; dst = WriteWinPtr + (dsty*linestep+dstx)
mov edi, sf_WriteWinPtr
mov eax, dsty
mul linestep
add eax, dstx
add edi, eax
.if field & 1
add edi, sf_LineWidth;
.endif
mov eax, new_src_line
mov edx, new_dst_line
mov esi, buf
mov ebx, h
.if opt_hscale_step==3
sub edi, 8
sf_lp2a:mov ecx, w4
shr ecx, 2
ALIGN 4
sf_lp2b:mov eax, [esi]
mov [edi+8], eax
mov eax, [esi+3]
mov [edi+12], eax
add edi, 16
mov eax, [esi+6]
mov [edi], eax
mov eax, [esi+9]
mov [edi+4], eax
add esi, 12
dec ecx
jnz sf_lp2b
; To avoid problem of last pixel coming from next line
; with arrange for w%16==12, so here is where we copy
; last 12 pixels.
mov eax, [esi]
mov [edi+8], eax
mov eax, [esi+3]
mov [edi+12], eax
add edi, 12
mov eax, [esi+6]
mov [edi+4], eax
add esi, 9
add esi, new_src_line
add edi, edx
dec ebx
jnz sf_lp2a
add edi, 8
.else
sf_lp: mov ecx, w4 ;width/4
rep movsd
add esi, eax
add edi, edx
dec ebx
jnz sf_lp
.endif
.else ; sf_SetBank ;------------------
mov esi, buf
; start = dsty * linestep + dstx
mov eax, linestep
mul dsty
.if field & 1
add eax, sf_LineWidth
.endif
add eax, dstx
; bank = start / WinGran
; dst = (start % WinGran) + sf_WriteWinPtr
mov edx, 0
div sf_WinGran
mov bank, eax
mov edi, edx
add edi, sf_WriteWinPtr
; Select new bank
mov bh, 0
mov bl, byte ptr sf_WriteWin
mov edx, bank
call sf_SetBank
; eax/edx destroyed by sf_SetBank
sf_0: ; rem = sf_WriteWinLimit - dst
mov eax, sf_WriteWinLimit
sub eax, edi
; h2 = (rem+(LineWidth-w))/LineWidth
add eax, linestep
sub eax, w
mov edx, 0
div linestep
; if (h<h2) h2=h
cmp h, eax
jae skplim
mov eax, h
skplim: ; if (h2==0) // No full lines can be transfered.
or eax, eax
jz sf_2
; h-= h2
; Transfer h2 lines to screen
sub h, eax
mov ebx, new_src_line
mov edx, new_dst_line
.if opt_hscale_step==3
sub edi, 8
sf_1a3: mov ecx, w4
shr ecx, 2
ALIGN 4
sf_1b3: mov ebx, [esi]
mov [edi+8], ebx
mov ebx, [esi+3]
mov [edi+12], ebx
add edi, 16
mov ebx, [esi+6]
mov [edi], ebx
mov ebx, [esi+9]
mov [edi+4], ebx
add esi, 12
dec ecx
jnz sf_1b3
; To avoid problem of last pixel coming from next line
; with arrange for w%16==12, so here is where we copy
; last 12 pixels.
mov ebx, [esi]
mov [edi+8], ebx
mov ebx, [esi+3]
mov [edi+12], ebx
add edi, 12
mov ebx, [esi+6]
mov [edi+4], ebx
add esi, 9
add esi, new_src_line
add edi, edx
dec eax
jnz sf_1a3
add edi, 8
.else
sf_1: mov ecx, w4 ; width/4
rep movsd
add esi, ebx
add edi, edx
dec eax
jnz sf_1
.endif
sf_2: ; if (h!=0) // There are still lines to be transfered
; // transfer partial line
or eax, h
jz sf_9
; w4a = 0 max (sf_WriteWinLimit-dst)/4
mov ecx, sf_WriteWinLimit
sub ecx, edi
sar ecx, 2
jns sf_2b
mov ecx, 0
sf_2b: push ecx ; Save size of first half
.if opt_hscale_step==3
ALIGN 4
or ecx, ecx
jz sf_2c3
sf_2b3: mov eax, [esi]
mov [edi], eax
add esi, 3
add edi, 4
dec ecx
jnz sf_2b3
sf_2c3:
.else
rep movsd
.endif
; bank += WinSize/WinGran //Assumes WinSize%WinGran==0
; off -= (WinSize/WinGran)*WinGran == WinSize
mov eax, sf_WinGranPerSize
add bank, eax
sub edi, sf_WinSize
; Select new bank
mov bh, 0
mov bl, byte ptr sf_WriteWin
mov edx, bank
call sf_SetBank
; eax/edx destroyed by sf_SetBank
; w4b = w4-w4a // Size of second half
pop eax ; Size of first half
mov ecx, w4
sub ecx, eax ; Size of 2nd half
.if opt_hscale_step==3
ALIGN 4
or ecx, ecx
jz sf_8b3
sf_8a3: mov eax, [esi]
mov [edi], eax
add esi, 3
add edi, 4
dec ecx
jnz sf_8a3
sf_8b3:
.else
rep movsd
.endif
add esi, new_src_line
add edi, new_dst_line
;; --h // Count split line
;; if (h!=0) continue
dec h
jnz sf_0
sf_9:
.endif ; sf_SetBank==0 ;------------------
ret
mve_ShowFrameField ENDP
nfHPkDecomp PROC USES ESI EDI EBX, \
ops:PTRBYTE, comp:PTRBYTE, \
x:DWORD, y:DWORD, w:DWORD, h:DWORD
LOCAL tbuf: PTRBYTE
LOCAL new_row:DWORD
LOCAL DiffBufPtrs:DWORD
LOCAL nfpk_back_right: DWORD
LOCAL wcnt:DWORD
LOCAL bcomp:PTRBYTE
LOG_LABEL "StartPkDecomp"
.data
nfhpk_OpTbl label dword
dword offset nf0 ; Prev Same (0)
dword offset nf1 ; No change (and copied to screen) (0)
dword offset nf2 ; Near shift from older part of current buf (1)
dword offset nf3 ; Near shift from newer part of current buf (1)
dword offset nf4 ; Near shift from previous buffer (1)
dword offset nf5 ; Far shift from previous buffer (2)
dword offset nf6 ; Far shift from current buffer (2)
; [Or if COMPOPS, run of no changes (0)]
dword offset nf7 ; 8x8x1 (10 bytes) or low 4x4x1 (4 bytes)
dword offset nf8 ; 2x2 4x4x1 (16 bytes) or 2x1 4x8x1 (12 bytes) or 1x2 8x4x1 (12 bytes)
dword offset nf9 ; 8x8x2 (20 bytes) or low 4x4x2 (8 bytes) or
; low 4x8x2 (12 bytes) or low 8x4x2 (12 bytes)
dword offset nf10 ; 2x2 4x4x2 (32 bytes) or 2x1 4x8x2 (24 bytes) or 1x2 4x8x2 (24 bytes)
dword offset nf11 ; 8x8x8 (64 bytes)
dword offset nf12 ; low 4x4x8 (16 bytes)
dword offset nf13 ; 2x2 4x4x0 (ie 2x2x8) (4 bytes)
dword offset nf14 ; 8x8x0 (1 byte)
dword offset nf15 ; mix 8x8x0 (2 bytes)
.code
NF_DECOMP_INIT 1
mov eax, nf_back_right
sub eax, SWIDTH*2
mov nfpk_back_right, eax
mov esi, comp
mov edi, tbuf
xor eax, eax
mov ax, [esi]
add eax, esi
mov bcomp, eax
add esi, 2
nf_StartRow:
mov eax, w
shr eax, 1
mov wcnt,eax
ALIGN 4
nf_NextPair:
dec wcnt
js nf_NextRow
mov ebx, ops
mov al, [ebx]
inc ebx
mov ops, ebx
xor ebx, ebx
mov bl, al
shr bl, 4
and eax, 0Fh
push offset nf_NextPair
push nfhpk_OpTbl[ebx*4]
jmp nfhpk_OpTbl[eax*4]
nf_NextRow:
add edi, new_row
dec h
jnz nf_StartRow
LOG_LABEL "EndPkDecomp"
ret
;----------------------------------------
ALIGN 4
nf0: ; No change from previous buffer
mov eax, DiffBufPtrs
jmp nf_shift
;----------------------------------------
ALIGN 4
nf1: ; No change (and copied to screen)
if 0 ;debug
mov ebx, 0
jmp nf_solid
endif
add edi, SWIDTH*2
retn
;----------------------------------------
ALIGN 4
nf2: ; Near shift from older part of current buffer
xor eax, eax
mov ebx, bcomp
inc bcomp
mov al, [ebx]
mov ax, nfpk_ShiftP2[eax*2]
nf_xyc_shift:
xor ebx, ebx
mov bl, ah
shl eax, 24
sar eax, 24-1
add eax, nfpk_ShiftY[ebx*4]
jmp nf_shift
;----------------------------------------
ALIGN 4
nf3: ; Near shift from newer part of current buffer
xor eax, eax
mov ebx, bcomp
inc bcomp
mov al, [ebx]
mov ax, nfpk_ShiftP2[eax*2]
neg al
neg ah
jmp nf_xyc_shift
;----------------------------------------
ALIGN 4
nf4: ; Near shift from previous buffer
xor eax, eax
mov ebx, bcomp
inc bcomp
mov al, [ebx]
mov ax, nfpk_ShiftP1[eax*2]
jmp nf_xyp_shift
;----------------------------------------
ALIGN 4
nf5: ; Far shift from previous buffer
mov ax, [esi]
add esi, 2
nf_xyp_shift:
xor ebx, ebx
mov bl, ah
shl eax, 24
sar eax, 24-1
add eax, nfpk_ShiftY[ebx*4]
add eax, DiffBufPtrs
jmp nf_shift
;----------------------------------------
ALIGN 4
nf6: ; Far shift from current buffer
mov ax, [esi]
add esi, 2
jmp nf_xyc_shift
;----------------------------------------
ALIGN 4
nf_shift:
if 0 ;debug
mov ebx, 0
jmp nf_solid
endif
mov ebx, esi ; save esi
lea esi, [edi+eax]
mov edx, nf_width
REPEAT 7
mov eax, [esi]
mov [edi], eax
mov eax, [esi+4]
mov [edi+4], eax
mov eax, [esi+8]
mov [edi+8], eax
mov eax, [esi+12]
mov [edi+12], eax
add esi, edx
add edi, edx
ENDM
mov eax, [esi]
mov [edi], eax
mov eax, [esi+4]
mov [edi+4], eax
mov eax, [esi+8]
mov [edi+8], eax
mov eax, [esi+12]
mov [edi+12], eax
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
mov esi, ebx ; restore esi
retn
;----------------------------------------
ALIGN 4
nf7: ; 8x8x1 (12 bytes)
test word ptr [esi], 08000h
jnz nf23
if 0 ;debug
add esi, 12
mov ebx, 0
jmp nf_solid
endif
xor eax, eax
lea ecx, nfhpk_mov8
lea edx, byte ptr ds:nf7_11+1
mov al, [esi+4]
mov ebx, [ecx+eax*4]
mov [edx+(nf7_11-nf7_11)], bl
mov [edx+(nf7_12-nf7_11)], bh
shr ebx, 16
mov [edx+(nf7_13-nf7_11)], bl
mov [edx+(nf7_14-nf7_11)], bh
mov al, [esi+5]
mov ebx, [ecx+eax*4]
mov [edx+(nf7_21-nf7_11)], bl
mov [edx+(nf7_22-nf7_11)], bh
shr ebx, 16
mov [edx+(nf7_23-nf7_11)], bl
mov [edx+(nf7_24-nf7_11)], bh
mov al, [esi+6]
mov ebx, [ecx+eax*4]
mov [edx+(nf7_31-nf7_11)], bl
mov [edx+(nf7_32-nf7_11)], bh
shr ebx, 16
mov [edx+(nf7_33-nf7_11)], bl
mov [edx+(nf7_34-nf7_11)], bh
mov al, [esi+7]
mov ebx, [ecx+eax*4]
mov [edx+(nf7_41-nf7_11)], bl
mov [edx+(nf7_42-nf7_11)], bh
shr ebx, 16
mov [edx+(nf7_43-nf7_11)], bl
mov [edx+(nf7_44-nf7_11)], bh
lea edx, [edx+(nf7_51-nf7_11)]
mov al, [esi+8]
mov ebx, [ecx+eax*4]
mov [edx+(nf7_51-nf7_51)], bl
mov [edx+(nf7_52-nf7_51)], bh
shr ebx, 16
mov [edx+(nf7_53-nf7_51)], bl
mov [edx+(nf7_54-nf7_51)], bh
mov al, [esi+9]
mov ebx, [ecx+eax*4]
mov [edx+(nf7_61-nf7_51)], bl
mov [edx+(nf7_62-nf7_51)], bh
shr ebx, 16
mov [edx+(nf7_63-nf7_51)], bl
mov [edx+(nf7_64-nf7_51)], bh
mov al, [esi+10]
mov ebx, [ecx+eax*4]
mov [edx+(nf7_71-nf7_51)], bl
mov [edx+(nf7_72-nf7_51)], bh
shr ebx, 16
mov [edx+(nf7_73-nf7_51)], bl
mov [edx+(nf7_74-nf7_51)], bh
mov al, [esi+11]
mov ebx, [ecx+eax*4]
mov [edx+(nf7_81-nf7_51)], bl
mov [edx+(nf7_82-nf7_51)], bh
shr ebx, 16
mov [edx+(nf7_83-nf7_51)], bl
mov [edx+(nf7_84-nf7_51)], bh
push ebp
push esi
; load ebx,edx,ecx,ebp with 00,01,10,11 color combinations
; (note that bits are read least significant first).
Trans16 cx, esi+2
shl ecx, 16
Trans16 cx, esi
mov esi,nf_width
mov edx, ecx
ror edx, 16
mov ebx, edx
mov bx, cx
mov ebp, ecx
mov bp, dx
jmp nf7_0 ; flush prefetch
ALIGN 4
nf7_0:
nf7_11: mov [ebp+0], ebx
nf7_12: mov [ebp+4], ebx
nf7_13: mov [ebp+8], ebx
nf7_14: mov [ebp+12], ebx
add edi, esi
nf7_21: mov [ebp+0], ebx
nf7_22: mov [ebp+4], ebx
nf7_23: mov [ebp+8], ebx
nf7_24: mov [ebp+12], ebx
add edi, esi
nf7_31: mov [ebp+0], ebx
nf7_32: mov [ebp+4], ebx
nf7_33: mov [ebp+8], ebx
nf7_34: mov [ebp+12], ebx
add edi, esi
nf7_41: mov [ebp+0], ebx
nf7_42: mov [ebp+4], ebx
nf7_43: mov [ebp+8], ebx
nf7_44: mov [ebp+12], ebx
add edi, esi
nf7_51: mov [ebp+0], ebx
nf7_52: mov [ebp+4], ebx
nf7_53: mov [ebp+8], ebx
nf7_54: mov [ebp+12], ebx
add edi, esi
nf7_61: mov [ebp+0], ebx
nf7_62: mov [ebp+4], ebx
nf7_63: mov [ebp+8], ebx
nf7_64: mov [ebp+12], ebx
add edi, esi
nf7_71: mov [ebp+0], ebx
nf7_72: mov [ebp+4], ebx
nf7_73: mov [ebp+8], ebx
nf7_74: mov [ebp+12], ebx
add edi, esi
nf7_81: mov [ebp+0], ebx
nf7_82: mov [ebp+4], ebx
nf7_83: mov [ebp+8], ebx
nf7_84: mov [ebp+12], ebx
pop esi
pop ebp
add esi, 12
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
;----------------------------------------
ALIGN 4
;nf7+16
nf23: ; low 4x4x1 (6 bytes)
if 0 ;debug
add esi, 6
mov ebx, 0
jmp nf_solid
endif
xor eax, eax
lea ecx, nfhpk_mov4l
lea edx, byte ptr ds:nf23_11+1
mov al, [esi+4]
and al, 0fH
mov ebx, [ecx+eax*4]
mov [edx+(nf23_11-nf23_11)], bl
mov [edx+(nf23_12-nf23_11)], bh
shr ebx, 16
mov [edx+(nf23_13-nf23_11)], bl
mov [edx+(nf23_14-nf23_11)], bh
mov al, [esi+4]
shr al, 4
mov ebx, [ecx+eax*4]
mov [edx+(nf23_31-nf23_11)], bl
mov [edx+(nf23_32-nf23_11)], bh
shr ebx, 16
mov [edx+(nf23_33-nf23_11)], bl
mov [edx+(nf23_34-nf23_11)], bh
mov al, [esi+5]
and al, 0fH
mov ebx, [ecx+eax*4]
mov [edx+(nf23_51-nf23_11)], bl
mov [edx+(nf23_52-nf23_11)], bh
shr ebx, 16
mov [edx+(nf23_53-nf23_11)], bl
mov [edx+(nf23_54-nf23_11)], bh
mov al, [esi+5]
shr al, 4
mov ebx, [ecx+eax*4]
mov [edx+(nf23_71-nf23_11)], bl
mov [edx+(nf23_72-nf23_11)], bh
shr ebx, 16
mov [edx+(nf23_73-nf23_11)], bl
mov [edx+(nf23_74-nf23_11)], bh
mov edx, nf_width
; load ebx,ecx with 00,11 color combinations
Trans16 cx, esi, 1
shrd ebx, ecx, 16
mov bx, cx
Trans16 cx, esi+2
shrd eax, ecx, 16
mov ax, cx
mov ecx, eax
jmp nf23_0 ; flush prefetch
ALIGN 4
nf23_0:
nf23_11:mov eax, ebx
mov [edi], eax
mov [edi+edx], eax
nf23_12:mov eax, ebx
mov [edi+4], eax
mov [edi+edx+4], eax
nf23_13:mov eax, ebx
mov [edi+8], eax
mov [edi+edx+8], eax
nf23_14:mov eax, ebx
mov [edi+12], eax
mov [edi+edx+12], eax
lea edi, [edi+edx*2]
nf23_31:mov eax, ebx
mov [edi], eax
mov [edi+edx], eax
nf23_32:mov eax, ebx
mov [edi+4], eax
mov [edi+edx+4], eax
nf23_33:mov eax, ebx
mov [edi+8], eax
mov [edi+edx+8], eax
nf23_34:mov eax, ebx
mov [edi+12], eax
mov [edi+edx+12], eax
lea edi, [edi+edx*2]
nf23_51:mov eax, ebx
mov [edi], eax
mov [edi+edx], eax
nf23_52:mov eax, ebx
mov [edi+4], eax
mov [edi+edx+4], eax
nf23_53:mov eax, ebx
mov [edi+8], eax
mov [edi+edx+8], eax
nf23_54:mov eax, ebx
mov [edi+12], eax
mov [edi+edx+12], eax
lea edi, [edi+edx*2]
nf23_71:mov eax, ebx
mov [edi], eax
mov [edi+edx], eax
nf23_72:mov eax, ebx
mov [edi+4], eax
mov [edi+edx+4], eax
nf23_73:mov eax, ebx
mov [edi+8], eax
mov [edi+edx+8], eax
nf23_74:mov eax, ebx
mov [edi+12], eax
mov [edi+edx+12], eax
add edi, edx
sub edi, nfpk_back_right
add esi, 6
retn
;----------------------------------------
ALIGN 4
nf8: ; 2x2 4x4x1 (24 bytes)
test word ptr [esi], 08000h
jnz nf24
if 0 ;debug
add esi, 24
mov ebx, 0
jmp nf_solid
endif
xor eax, eax
lea ecx, nfhpk_mov8
lea edx, byte ptr ds:nf8_11+1
mov al, [esi+4]
mov ebx, [ecx+eax*4]
mov [edx+(nf8_11-nf8_11)], bl
mov [edx+(nf8_12-nf8_11)], bh
shr ebx, 16
mov [edx+(nf8_13-nf8_11)], bl
mov [edx+(nf8_14-nf8_11)], bh
mov al, [esi+5]
mov ebx, [ecx+eax*4]
mov [edx+(nf8_21-nf8_11)], bl
mov [edx+(nf8_22-nf8_11)], bh
shr ebx, 16
mov [edx+(nf8_23-nf8_11)], bl
mov [edx+(nf8_24-nf8_11)], bh
mov al, [esi+10]
mov ebx, [ecx+eax*4]
mov [edx+(nf8_31-nf8_11)], bl
mov [edx+(nf8_32-nf8_11)], bh
shr ebx, 16
mov [edx+(nf8_33-nf8_11)], bl
mov [edx+(nf8_34-nf8_11)], bh
mov al, [esi+11]
mov ebx, [ecx+eax*4]
mov [edx+(nf8_41-nf8_11)], bl
mov [edx+(nf8_42-nf8_11)], bh
shr ebx, 16
mov [edx+(nf8_43-nf8_11)], bl
mov [edx+(nf8_44-nf8_11)], bh
add edx, nf8_51-nf8_11
mov al, [esi+16]
mov ebx, [ecx+eax*4]
mov [edx+(nf8_51-nf8_51)], bl
mov [edx+(nf8_52-nf8_51)], bh
shr ebx, 16
mov [edx+(nf8_53-nf8_51)], bl
mov [edx+(nf8_54-nf8_51)], bh
mov al, [esi+17]
mov ebx, [ecx+eax*4]
mov [edx+(nf8_61-nf8_51)], bl
mov [edx+(nf8_62-nf8_51)], bh
shr ebx, 16
mov [edx+(nf8_63-nf8_51)], bl
mov [edx+(nf8_64-nf8_51)], bh
mov al, [esi+22]
mov ebx, [ecx+eax*4]
mov [edx+(nf8_71-nf8_51)], bl
mov [edx+(nf8_72-nf8_51)], bh
shr ebx, 16
mov [edx+(nf8_73-nf8_51)], bl
mov [edx+(nf8_74-nf8_51)], bh
mov al, [esi+23]
mov ebx, [ecx+eax*4]
mov [edx+(nf8_81-nf8_51)], bl
mov [edx+(nf8_82-nf8_51)], bh
shr ebx, 16
mov [edx+(nf8_83-nf8_51)], bl
mov [edx+(nf8_84-nf8_51)], bh
push ebp
push esi
; load ebx,edx,ecx,ebp with 00,01,10,11 color combinations
; (note that bits are read least significant first).
Trans16 cx, esi+18+2
shl ecx, 16
Trans16 cx, esi+18
push ecx
Trans16 cx, esi+12+2
shl ecx, 16
Trans16 cx, esi+12
push ecx
Trans16 cx, esi+6+2
shl ecx, 16
Trans16 cx, esi+6
push ecx
Trans16 cx, esi+2
shl ecx, 16
Trans16 cx, esi
mov esi,nf_width
mov edx, ecx
ror edx, 16
mov ebx, edx
mov bx, cx
mov ebp, ecx
mov bp, dx
jmp nf8_0 ; flush prefetch
ALIGN 4
nf8_0:
nf8_11: mov [ebp+0], ebx
nf8_12: mov [ebp+4], ebx
add edi, esi
nf8_13: mov [ebp+0], ebx
nf8_14: mov [ebp+4], ebx
add edi, esi
nf8_21: mov [ebp+0], ebx
nf8_22: mov [ebp+4], ebx
add edi, esi
nf8_23: mov [ebp+0], ebx
nf8_24: mov [ebp+4], ebx
add edi, esi
pop ecx
mov edx, ecx
ror edx, 16
mov ebx, edx
mov bx, cx
mov ebp, ecx
mov bp, dx
nf8_31: mov [ebp+0], ebx
nf8_32: mov [ebp+4], ebx
add edi, esi
nf8_33: mov [ebp+0], ebx
nf8_34: mov [ebp+4], ebx
add edi, esi
nf8_41: mov [ebp+0], ebx
nf8_42: mov [ebp+4], ebx
add edi, esi
nf8_43: mov [ebp+0], ebx
nf8_44: mov [ebp+4], ebx
add edi, esi
lea eax, [esi*8-8]
sub edi, eax
pop ecx
mov edx, ecx
ror edx, 16
mov ebx, edx
mov bx, cx
mov ebp, ecx
mov bp, dx
nf8_51: mov [ebp+0], ebx
nf8_52: mov [ebp+4], ebx
add edi, esi
nf8_53: mov [ebp+0], ebx
nf8_54: mov [ebp+4], ebx
add edi, esi
nf8_61: mov [ebp+0], ebx
nf8_62: mov [ebp+4], ebx
add edi, esi
nf8_63: mov [ebp+0], ebx
nf8_64: mov [ebp+4], ebx
add edi, esi
pop ecx
mov edx, ecx
ror edx, 16
mov ebx, edx
mov bx, cx
mov ebp, ecx
mov bp, dx
nf8_71: mov [ebp+0], ebx
nf8_72: mov [ebp+4], ebx
add edi, esi
nf8_73: mov [ebp+0], ebx
nf8_74: mov [ebp+4], ebx
add edi, esi
nf8_81: mov [ebp+0], ebx
nf8_82: mov [ebp+4], ebx
add edi, esi
nf8_83: mov [ebp+0], ebx
nf8_84: mov [ebp+4], ebx
pop esi
pop ebp
add esi, 24
sub edi, 8
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
;----------------------------------------
ALIGN 4
;nf8+16
nf24: ; 2x1 4x8x1 (16 bytes)
test word ptr [esi+8], 08000h
jnz nf40
if 0 ;debug
add esi, 16
mov ebx, 0
jmp nf_solid
endif
xor eax, eax
lea ecx, nfhpk_mov8
lea edx, byte ptr ds:nf24_11+1
mov al, [esi+4]
mov ebx, [ecx+eax*4]
mov [edx+(nf24_11-nf24_11)], bl
mov [edx+(nf24_12-nf24_11)], bh
shr ebx, 16
mov [edx+(nf24_13-nf24_11)], bl
mov [edx+(nf24_14-nf24_11)], bh
mov al, [esi+5]
mov ebx, [ecx+eax*4]
mov [edx+(nf24_21-nf24_11)], bl
mov [edx+(nf24_22-nf24_11)], bh
shr ebx, 16
mov [edx+(nf24_23-nf24_11)], bl
mov [edx+(nf24_24-nf24_11)], bh
mov al, [esi+6]
mov ebx, [ecx+eax*4]
mov [edx+(nf24_31-nf24_11)], bl
mov [edx+(nf24_32-nf24_11)], bh
shr ebx, 16
mov [edx+(nf24_33-nf24_11)], bl
mov [edx+(nf24_34-nf24_11)], bh
mov al, [esi+7]
mov ebx, [ecx+eax*4]
mov [edx+(nf24_41-nf24_11)], bl
mov [edx+(nf24_42-nf24_11)], bh
shr ebx, 16
mov [edx+(nf24_43-nf24_11)], bl
mov [edx+(nf24_44-nf24_11)], bh
add edx, nf24_51-nf24_11
mov al, [esi+12]
mov ebx, [ecx+eax*4]
mov [edx+(nf24_51-nf24_51)], bl
mov [edx+(nf24_52-nf24_51)], bh
shr ebx, 16
mov [edx+(nf24_53-nf24_51)], bl
mov [edx+(nf24_54-nf24_51)], bh
mov al, [esi+13]
mov ebx, [ecx+eax*4]
mov [edx+(nf24_61-nf24_51)], bl
mov [edx+(nf24_62-nf24_51)], bh
shr ebx, 16
mov [edx+(nf24_63-nf24_51)], bl
mov [edx+(nf24_64-nf24_51)], bh
mov al, [esi+14]
mov ebx, [ecx+eax*4]
mov [edx+(nf24_71-nf24_51)], bl
mov [edx+(nf24_72-nf24_51)], bh
shr ebx, 16
mov [edx+(nf24_73-nf24_51)], bl
mov [edx+(nf24_74-nf24_51)], bh
mov al, [esi+15]
mov ebx, [ecx+eax*4]
mov [edx+(nf24_81-nf24_51)], bl
mov [edx+(nf24_82-nf24_51)], bh
shr ebx, 16
mov [edx+(nf24_83-nf24_51)], bl
mov [edx+(nf24_84-nf24_51)], bh
push ebp
push esi
; load ebx,edx,ecx,ebp with 00,01,10,11 color combinations
; (note that bits are read least significant first).
Trans16 cx, esi+8+2
shl ecx, 16
Trans16 cx, esi+8
push ecx
Trans16 cx, esi+2
shl ecx, 16
Trans16 cx, esi, 1
mov esi,nf_width
mov edx, ecx
ror edx, 16
mov ebx, edx
mov bx, cx
mov ebp, ecx
mov bp, dx
jmp nf24_0 ; flush prefetch
ALIGN 4
nf24_0:
nf24_11:mov [ebp+0], ebx
nf24_12:mov [ebp+4], ebx
add edi, esi
nf24_13:mov [ebp+0], ebx
nf24_14:mov [ebp+4], ebx
add edi, esi
nf24_21:mov [ebp+0], ebx
nf24_22:mov [ebp+4], ebx
add edi, esi
nf24_23:mov [ebp+0], ebx
nf24_24:mov [ebp+4], ebx
add edi, esi
nf24_31:mov [ebp+0], ebx
nf24_32:mov [ebp+4], ebx
add edi, esi
nf24_33:mov [ebp+0], ebx
nf24_34:mov [ebp+4], ebx
add edi, esi
nf24_41:mov [ebp+0], ebx
nf24_42:mov [ebp+4], ebx
add edi, esi
nf24_43:mov [ebp+0], ebx
nf24_44:mov [ebp+4], ebx
add edi, esi
lea eax, [esi*8-8]
sub edi, eax
pop ecx
mov edx, ecx
ror edx, 16
mov ebx, edx
mov bx, cx
mov ebp, ecx
mov bp, dx
nf24_51:mov [ebp+0], ebx
nf24_52:mov [ebp+4], ebx
add edi, esi
nf24_53:mov [ebp+0], ebx
nf24_54:mov [ebp+4], ebx
add edi, esi
nf24_61:mov [ebp+0], ebx
nf24_62:mov [ebp+4], ebx
add edi, esi
nf24_63:mov [ebp+0], ebx
nf24_64:mov [ebp+4], ebx
add edi, esi
nf24_71:mov [ebp+0], ebx
nf24_72:mov [ebp+4], ebx
add edi, esi
nf24_73:mov [ebp+0], ebx
nf24_74:mov [ebp+4], ebx
add edi, esi
nf24_81:mov [ebp+0], ebx
nf24_82:mov [ebp+4], ebx
add edi, esi
nf24_83:mov [ebp+0], ebx
nf24_84:mov [ebp+4], ebx
pop esi
pop ebp
add esi, 16
sub edi, 8
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
;----------------------------------------
ALIGN 4
;nf8+32
nf40: ; 1x2 8x4x1 (16 bytes)
if 0 ;debug
add esi, 16
mov ebx, 0
jmp nf_solid
endif
xor eax, eax
lea ecx, nfhpk_mov8
lea edx, byte ptr ds:nf40_11+1
mov al, [esi+4]
mov ebx, [ecx+eax*4]
mov [edx+(nf40_11-nf40_11)], bl
mov [edx+(nf40_12-nf40_11)], bh
shr ebx, 16
mov [edx+(nf40_13-nf40_11)], bl
mov [edx+(nf40_14-nf40_11)], bh
mov al, [esi+5]
mov ebx, [ecx+eax*4]
mov [edx+(nf40_21-nf40_11)], bl
mov [edx+(nf40_22-nf40_11)], bh
shr ebx, 16
mov [edx+(nf40_23-nf40_11)], bl
mov [edx+(nf40_24-nf40_11)], bh
mov al, [esi+6]
mov ebx, [ecx+eax*4]
mov [edx+(nf40_31-nf40_11)], bl
mov [edx+(nf40_32-nf40_11)], bh
shr ebx, 16
mov [edx+(nf40_33-nf40_11)], bl
mov [edx+(nf40_34-nf40_11)], bh
mov al, [esi+7]
mov ebx, [ecx+eax*4]
mov [edx+(nf40_41-nf40_11)], bl
mov [edx+(nf40_42-nf40_11)], bh
shr ebx, 16
mov [edx+(nf40_43-nf40_11)], bl
mov [edx+(nf40_44-nf40_11)], bh
add edx, nf40_51-nf40_11
mov al, [esi+12]
mov ebx, [ecx+eax*4]
mov [edx+(nf40_51-nf40_51)], bl
mov [edx+(nf40_52-nf40_51)], bh
shr ebx, 16
mov [edx+(nf40_53-nf40_51)], bl
mov [edx+(nf40_54-nf40_51)], bh
mov al, [esi+13]
mov ebx, [ecx+eax*4]
mov [edx+(nf40_61-nf40_51)], bl
mov [edx+(nf40_62-nf40_51)], bh
shr ebx, 16
mov [edx+(nf40_63-nf40_51)], bl
mov [edx+(nf40_64-nf40_51)], bh
mov al, [esi+14]
mov ebx, [ecx+eax*4]
mov [edx+(nf40_71-nf40_51)], bl
mov [edx+(nf40_72-nf40_51)], bh
shr ebx, 16
mov [edx+(nf40_73-nf40_51)], bl
mov [edx+(nf40_74-nf40_51)], bh
mov al, [esi+15]
mov ebx, [ecx+eax*4]
mov [edx+(nf40_81-nf40_51)], bl
mov [edx+(nf40_82-nf40_51)], bh
shr ebx, 16
mov [edx+(nf40_83-nf40_51)], bl
mov [edx+(nf40_84-nf40_51)], bh
push ebp
push esi
; load ebx,edx,ecx,ebp with 00,01,10,11 color combinations
; (note that bits are read least significant first).
Trans16 cx, esi+8+2
shl ecx, 16
Trans16 cx, esi+8, 1
push ecx
Trans16 cx, esi+2
shl ecx, 16
Trans16 cx, esi, 1
mov esi,nf_width
mov edx, ecx
ror edx, 16
mov ebx, edx
mov bx, cx
mov ebp, ecx
mov bp, dx
jmp nf40_0 ; flush prefetch
ALIGN 4
nf40_0:
nf40_11:mov [ebp+0], ebx
nf40_12:mov [ebp+4], ebx
nf40_13:mov [ebp+8], ebx
nf40_14:mov [ebp+12], ebx
add edi, esi
nf40_21:mov [ebp+0], ebx
nf40_22:mov [ebp+4], ebx
nf40_23:mov [ebp+8], ebx
nf40_24:mov [ebp+12], ebx
add edi, esi
nf40_31:mov [ebp+0], ebx
nf40_32:mov [ebp+4], ebx
nf40_33:mov [ebp+8], ebx
nf40_34:mov [ebp+12], ebx
add edi, esi
nf40_41:mov [ebp+0], ebx
nf40_42:mov [ebp+4], ebx
nf40_43:mov [ebp+8], ebx
nf40_44:mov [ebp+12], ebx
add edi, esi
pop ecx
mov edx, ecx
ror edx, 16
mov ebx, edx
mov bx, cx
mov ebp, ecx
mov bp, dx
nf40_51:mov [ebp+0], ebx
nf40_52:mov [ebp+4], ebx
nf40_53:mov [ebp+8], ebx
nf40_54:mov [ebp+12], ebx
add edi, esi
nf40_61:mov [ebp+0], ebx
nf40_62:mov [ebp+4], ebx
nf40_63:mov [ebp+8], ebx
nf40_64:mov [ebp+12], ebx
add edi, esi
nf40_71:mov [ebp+0], ebx
nf40_72:mov [ebp+4], ebx
nf40_73:mov [ebp+8], ebx
nf40_74:mov [ebp+12], ebx
add edi, esi
nf40_81:mov [ebp+0], ebx
nf40_82:mov [ebp+4], ebx
nf40_83:mov [ebp+8], ebx
nf40_84:mov [ebp+12], ebx
pop esi
pop ebp
add esi, 16
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
;----------------------------------------
ALIGN 4
nf9: ; 8x8x2 (24 bytes)
test word ptr [esi], 08000h
jnz nf41
test word ptr [esi+4], 08000h
jnz nf25
if 0 ;debug
add esi, 24
mov ebx, 0
jmp nf_solid
endif
xor eax, eax
lea ecx, nfhpk_mov4
lea edx, byte ptr ds:nf9_11+2
mov al, [esi+8]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_11-nf9_11)], bh
mov [edx+(nf9_12-nf9_11)], bl
shr ebx, 16
mov [edx+(nf9_13-nf9_11)], bh
mov [edx+(nf9_14-nf9_11)], bl
mov al, [esi+9]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_15-nf9_11)], bh
mov [edx+(nf9_16-nf9_11)], bl
shr ebx, 16
mov [edx+(nf9_17-nf9_11)], bh
mov [edx+(nf9_18-nf9_11)], bl
mov al, [esi+10]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_21-nf9_11)], bh
mov [edx+(nf9_22-nf9_11)], bl
shr ebx, 16
mov [edx+(nf9_23-nf9_11)], bh
mov [edx+(nf9_24-nf9_11)], bl
mov al, [esi+11]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_25-nf9_11)], bh
mov [edx+(nf9_26-nf9_11)], bl
shr ebx, 16
mov [edx+(nf9_27-nf9_11)], bh
mov [edx+(nf9_28-nf9_11)], bl
mov al, [esi+12]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_31-nf9_11)], bh
mov [edx+(nf9_32-nf9_11)], bl
shr ebx, 16
mov [edx+(nf9_33-nf9_11)], bh
mov [edx+(nf9_34-nf9_11)], bl
mov al, [esi+13]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_35-nf9_11)], bh
mov [edx+(nf9_36-nf9_11)], bl
shr ebx, 16
mov [edx+(nf9_37-nf9_11)], bh
mov [edx+(nf9_38-nf9_11)], bl
mov al, [esi+14]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_41-nf9_11)], bh
mov [edx+(nf9_42-nf9_11)], bl
shr ebx, 16
mov [edx+(nf9_43-nf9_11)], bh
mov [edx+(nf9_44-nf9_11)], bl
mov al, [esi+15]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_45-nf9_11)], bh
mov [edx+(nf9_46-nf9_11)], bl
shr ebx, 16
mov [edx+(nf9_47-nf9_11)], bh
mov [edx+(nf9_48-nf9_11)], bl
lea edx, [edx+(nf9_51-nf9_11)]
mov al, [esi+16]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_51-nf9_51)], bh
mov [edx+(nf9_52-nf9_51)], bl
shr ebx, 16
mov [edx+(nf9_53-nf9_51)], bh
mov [edx+(nf9_54-nf9_51)], bl
mov al, [esi+17]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_55-nf9_51)], bh
mov [edx+(nf9_56-nf9_51)], bl
shr ebx, 16
mov [edx+(nf9_57-nf9_51)], bh
mov [edx+(nf9_58-nf9_51)], bl
mov al, [esi+18]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_61-nf9_51)], bh
mov [edx+(nf9_62-nf9_51)], bl
shr ebx, 16
mov [edx+(nf9_63-nf9_51)], bh
mov [edx+(nf9_64-nf9_51)], bl
mov al, [esi+19]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_65-nf9_51)], bh
mov [edx+(nf9_66-nf9_51)], bl
shr ebx, 16
mov [edx+(nf9_67-nf9_51)], bh
mov [edx+(nf9_68-nf9_51)], bl
mov al, [esi+20]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_71-nf9_51)], bh
mov [edx+(nf9_72-nf9_51)], bl
shr ebx, 16
mov [edx+(nf9_73-nf9_51)], bh
mov [edx+(nf9_74-nf9_51)], bl
mov al, [esi+21]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_75-nf9_51)], bh
mov [edx+(nf9_76-nf9_51)], bl
shr ebx, 16
mov [edx+(nf9_77-nf9_51)], bh
mov [edx+(nf9_78-nf9_51)], bl
mov al, [esi+22]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_81-nf9_51)], bh
mov [edx+(nf9_82-nf9_51)], bl
shr ebx, 16
mov [edx+(nf9_83-nf9_51)], bh
mov [edx+(nf9_84-nf9_51)], bl
mov al, [esi+23]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_85-nf9_51)], bh
mov [edx+(nf9_86-nf9_51)], bl
shr ebx, 16
mov [edx+(nf9_87-nf9_51)], bh
mov [edx+(nf9_88-nf9_51)], bl
push ebp
push esi
; Load bx,dx,cx,bp with four colors
Trans16 bx, esi
Trans16 dx, esi+2
Trans16 cx, esi+4
Trans16 bp, esi+6
mov esi, nf_width
jmp nf9_0 ; flush prefetch
ALIGN 4
nf9_0:
nf9_11: mov ax, bx
shl eax, 16
nf9_12: mov ax, bx
mov [edi], eax
nf9_13: mov ax, bx
shl eax, 16
nf9_14: mov ax, bx
mov [edi+4], eax
nf9_15: mov ax, bx
shl eax, 16
nf9_16: mov ax, bx
mov [edi+8], eax
nf9_17: mov ax, bx
shl eax, 16
nf9_18: mov ax, bx
mov [edi+12], eax
add edi, esi
nf9_21: mov ax, bx
shl eax, 16
nf9_22: mov ax, bx
mov [edi], eax
nf9_23: mov ax, bx
shl eax, 16
nf9_24: mov ax, bx
mov [edi+4], eax
nf9_25: mov ax, bx
shl eax, 16
nf9_26: mov ax, bx
mov [edi+8], eax
nf9_27: mov ax, bx
shl eax, 16
nf9_28: mov ax, bx
mov [edi+12], eax
add edi, esi
nf9_31: mov ax, bx
shl eax, 16
nf9_32: mov ax, bx
mov [edi], eax
nf9_33: mov ax, bx
shl eax, 16
nf9_34: mov ax, bx
mov [edi+4], eax
nf9_35: mov ax, bx
shl eax, 16
nf9_36: mov ax, bx
mov [edi+8], eax
nf9_37: mov ax, bx
shl eax, 16
nf9_38: mov ax, bx
mov [edi+12], eax
add edi, esi
nf9_41: mov ax, bx
shl eax, 16
nf9_42: mov ax, bx
mov [edi], eax
nf9_43: mov ax, bx
shl eax, 16
nf9_44: mov ax, bx
mov [edi+4], eax
nf9_45: mov ax, bx
shl eax, 16
nf9_46: mov ax, bx
mov [edi+8], eax
nf9_47: mov ax, bx
shl eax, 16
nf9_48: mov ax, bx
mov [edi+12], eax
add edi, esi
nf9_51: mov ax, bx
shl eax, 16
nf9_52: mov ax, bx
mov [edi], eax
nf9_53: mov ax, bx
shl eax, 16
nf9_54: mov ax, bx
mov [edi+4], eax
nf9_55: mov ax, bx
shl eax, 16
nf9_56: mov ax, bx
mov [edi+8], eax
nf9_57: mov ax, bx
shl eax, 16
nf9_58: mov ax, bx
mov [edi+12], eax
add edi, esi
nf9_61: mov ax, bx
shl eax, 16
nf9_62: mov ax, bx
mov [edi], eax
nf9_63: mov ax, bx
shl eax, 16
nf9_64: mov ax, bx
mov [edi+4], eax
nf9_65: mov ax, bx
shl eax, 16
nf9_66: mov ax, bx
mov [edi+8], eax
nf9_67: mov ax, bx
shl eax, 16
nf9_68: mov ax, bx
mov [edi+12], eax
add edi, esi
nf9_71: mov ax, bx
shl eax, 16
nf9_72: mov ax, bx
mov [edi], eax
nf9_73: mov ax, bx
shl eax, 16
nf9_74: mov ax, bx
mov [edi+4], eax
nf9_75: mov ax, bx
shl eax, 16
nf9_76: mov ax, bx
mov [edi+8], eax
nf9_77: mov ax, bx
shl eax, 16
nf9_78: mov ax, bx
mov [edi+12], eax
add edi, esi
nf9_81: mov ax, bx
shl eax, 16
nf9_82: mov ax, bx
mov [edi], eax
nf9_83: mov ax, bx
shl eax, 16
nf9_84: mov ax, bx
mov [edi+4], eax
nf9_85: mov ax, bx
shl eax, 16
nf9_86: mov ax, bx
mov [edi+8], eax
nf9_87: mov ax, bx
shl eax, 16
nf9_88: mov ax, bx
mov [edi+12], eax
pop esi
pop ebp
add esi, 24
sub edi, nfpk_back_right
retn
;----------------------------------------
ALIGN 4
;nf9+16
nf25: ; low 4x4x2 (12 bytes)
if 0 ;debug
add esi, 12
mov ebx, 0
jmp nf_solid
endif
xor eax, eax
lea ecx, nfhpk_mov4
lea edx, byte ptr ds:nf25_11+1
mov al, [esi+8]
mov ebx, [ecx+eax*4]
mov [edx+(nf25_11-nf25_11)], bl
mov [edx+(nf25_12-nf25_11)], bh
shr ebx, 16
mov [edx+(nf25_13-nf25_11)], bl
mov [edx+(nf25_14-nf25_11)], bh
mov al, [esi+9]
mov ebx, [ecx+eax*4]
mov [edx+(nf25_21-nf25_11)], bl
mov [edx+(nf25_22-nf25_11)], bh
shr ebx, 16
mov [edx+(nf25_23-nf25_11)], bl
mov [edx+(nf25_24-nf25_11)], bh
mov al, [esi+10]
mov ebx, [ecx+eax*4]
mov [edx+(nf25_31-nf25_11)], bl
mov [edx+(nf25_32-nf25_11)], bh
shr ebx, 16
mov [edx+(nf25_33-nf25_11)], bl
mov [edx+(nf25_34-nf25_11)], bh
mov al, [esi+11]
mov ebx, [ecx+eax*4]
mov [edx+(nf25_41-nf25_11)], bl
mov [edx+(nf25_42-nf25_11)], bh
shr ebx, 16
mov [edx+(nf25_43-nf25_11)], bl
mov [edx+(nf25_44-nf25_11)], bh
push ebp
push esi
; Load ebx,edx,ecx,ebp with four colors, duplicated in high order.
Trans16 cx, esi
shrd ebx, ecx, 16
mov bx, cx
Trans16 cx, esi+2
shrd edx, ecx, 16
mov dx, cx
Trans16 cx, esi+4, 1
shrd eax, ecx, 16
mov ax, cx
push eax
Trans16 cx, esi+6
shrd ebp, ecx, 16
mov bp, cx
pop ecx
mov esi, nf_width
jmp nf25_0 ; flush prefetch
ALIGN 4
nf25_0:
nf25_11:mov eax, ebx
mov [edi], eax
mov [edi+esi], eax
nf25_12:mov eax, ebx
mov [edi+4], eax
mov [edi+esi+4], eax
nf25_13:mov eax, ebx
mov [edi+8], eax
mov [edi+esi+8], eax
nf25_14:mov eax, ebx
mov [edi+12], eax
mov [edi+esi+12], eax
lea edi, [edi+esi*2]
nf25_21:mov eax, ebx
mov [edi], eax
mov [edi+esi], eax
nf25_22:mov eax, ebx
mov [edi+4], eax
mov [edi+esi+4], eax
nf25_23:mov eax, ebx
mov [edi+8], eax
mov [edi+esi+8], eax
nf25_24:mov eax, ebx
mov [edi+12], eax
mov [edi+esi+12], eax
lea edi, [edi+esi*2]
nf25_31:mov eax, ebx
mov [edi], eax
mov [edi+esi], eax
nf25_32:mov eax, ebx
mov [edi+4], eax
mov [edi+esi+4], eax
nf25_33:mov eax, ebx
mov [edi+8], eax
mov [edi+esi+8], eax
nf25_34:mov eax, ebx
mov [edi+12], eax
mov [edi+esi+12], eax
lea edi, [edi+esi*2]
nf25_41:mov eax, ebx
mov [edi], eax
mov [edi+esi], eax
nf25_42:mov eax, ebx
mov [edi+4], eax
mov [edi+esi+4], eax
nf25_43:mov eax, ebx
mov [edi+8], eax
mov [edi+esi+8], eax
nf25_44:mov eax, ebx
mov [edi+12], eax
mov [edi+esi+12], eax
add edi, esi
pop esi
pop ebp
add esi, 12
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
;----------------------------------------
ALIGN 4
;nf9+32
nf41: ; low 4x8x2 (16 bytes)
test word ptr [esi+4], 08000h
jnz nf57
if 0 ;debug
add esi, 16
mov ebx, 0
jmp nf_solid
endif
xor eax, eax
lea ecx, nfhpk_mov8
lea edx, byte ptr ds:nf41_11+1
mov al, [esi+8]
mov ebx, [ecx+eax*4]
mov [edx+(nf41_11-nf41_11)], bl
mov [edx+(nf41_12-nf41_11)], bh
shr ebx, 16
mov [edx+(nf41_13-nf41_11)], bl
mov [edx+(nf41_14-nf41_11)], bh
mov al, [esi+9]
mov ebx, [ecx+eax*4]
mov [edx+(nf41_21-nf41_11)], bl
mov [edx+(nf41_22-nf41_11)], bh
shr ebx, 16
mov [edx+(nf41_23-nf41_11)], bl
mov [edx+(nf41_24-nf41_11)], bh
mov al, [esi+10]
mov ebx, [ecx+eax*4]
mov [edx+(nf41_31-nf41_11)], bl
mov [edx+(nf41_32-nf41_11)], bh
shr ebx, 16
mov [edx+(nf41_33-nf41_11)], bl
mov [edx+(nf41_34-nf41_11)], bh
mov al, [esi+11]
mov ebx, [ecx+eax*4]
mov [edx+(nf41_41-nf41_11)], bl
mov [edx+(nf41_42-nf41_11)], bh
shr ebx, 16
mov [edx+(nf41_43-nf41_11)], bl
mov [edx+(nf41_44-nf41_11)], bh
lea edx, [edx+(nf41_51-nf41_11)]
mov al, [esi+12]
mov ebx, [ecx+eax*4]
mov [edx+(nf41_51-nf41_51)], bl
mov [edx+(nf41_52-nf41_51)], bh
shr ebx, 16
mov [edx+(nf41_53-nf41_51)], bl
mov [edx+(nf41_54-nf41_51)], bh
mov al, [esi+13]
mov ebx, [ecx+eax*4]
mov [edx+(nf41_61-nf41_51)], bl
mov [edx+(nf41_62-nf41_51)], bh
shr ebx, 16
mov [edx+(nf41_63-nf41_51)], bl
mov [edx+(nf41_64-nf41_51)], bh
mov al, [esi+14]
mov ebx, [ecx+eax*4]
mov [edx+(nf41_71-nf41_51)], bl
mov [edx+(nf41_72-nf41_51)], bh
shr ebx, 16
mov [edx+(nf41_73-nf41_51)], bl
mov [edx+(nf41_74-nf41_51)], bh
mov al, [esi+15]
mov ebx, [ecx+eax*4]
mov [edx+(nf41_81-nf41_51)], bl
mov [edx+(nf41_82-nf41_51)], bh
shr ebx, 16
mov [edx+(nf41_83-nf41_51)], bl
mov [edx+(nf41_84-nf41_51)], bh
push ebp
push esi
; Load ebx,edx,ecx,ebp with four colors, duplicated in high order.
Trans16 cx, esi, 1
shrd ebx, ecx, 16
mov bx, cx
Trans16 cx, esi+2
shrd edx, ecx, 16
mov dx, cx
Trans16 cx, esi+4
shrd eax, ecx, 16
mov ax, cx
push eax
Trans16 cx, esi+6
shrd ebp, ecx, 16
mov bp, cx
pop ecx
mov esi, nf_width
jmp nf41_0 ; flush prefetch
ALIGN 4
nf41_0:
nf41_11:mov [ebp+0], ebx
nf41_12:mov [ebp+4], ebx
nf41_13:mov [ebp+8], ebx
nf41_14:mov [ebp+12], ebx
add edi, esi
nf41_21:mov [ebp+0], ebx
nf41_22:mov [ebp+4], ebx
nf41_23:mov [ebp+8], ebx
nf41_24:mov [ebp+12], ebx
add edi, esi
nf41_31:mov [ebp+0], ebx
nf41_32:mov [ebp+4], ebx
nf41_33:mov [ebp+8], ebx
nf41_34:mov [ebp+12], ebx
add edi, esi
nf41_41:mov [ebp+0], ebx
nf41_42:mov [ebp+4], ebx
nf41_43:mov [ebp+8], ebx
nf41_44:mov [ebp+12], ebx
add edi, esi
nf41_51:mov [ebp+0], ebx
nf41_52:mov [ebp+4], ebx
nf41_53:mov [ebp+8], ebx
nf41_54:mov [ebp+12], ebx
add edi, esi
nf41_61:mov [ebp+0], ebx
nf41_62:mov [ebp+4], ebx
nf41_63:mov [ebp+8], ebx
nf41_64:mov [ebp+12], ebx
add edi, esi
nf41_71:mov [ebp+0], ebx
nf41_72:mov [ebp+4], ebx
nf41_73:mov [ebp+8], ebx
nf41_74:mov [ebp+12], ebx
add edi, esi
nf41_81:mov [ebp+0], ebx
nf41_82:mov [ebp+4], ebx
nf41_83:mov [ebp+8], ebx
nf41_84:mov [ebp+12], ebx
pop esi
pop ebp
add esi, 16
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
;----------------------------------------
ALIGN 4
;nf9+48
nf57: ; low 8x4x2 (16 bytes)
if 0 ;debug
add esi, 16
mov ebx, 0
jmp nf_solid
endif
xor eax, eax
lea ecx, nfhpk_mov4
lea edx, byte ptr ds:nf57_11+2
mov al, [esi+8]
mov ebx, [ecx+eax*4]
mov [edx+(nf57_11-nf57_11)], bh
mov [edx+(nf57_12-nf57_11)], bl
shr ebx, 16
mov [edx+(nf57_13-nf57_11)], bh
mov [edx+(nf57_14-nf57_11)], bl
mov al, [esi+9]
mov ebx, [ecx+eax*4]
mov [edx+(nf57_15-nf57_11)], bh
mov [edx+(nf57_16-nf57_11)], bl
shr ebx, 16
mov [edx+(nf57_17-nf57_11)], bh
mov [edx+(nf57_18-nf57_11)], bl
mov al, [esi+10]
mov ebx, [ecx+eax*4]
mov [edx+(nf57_21-nf57_11)], bh
mov [edx+(nf57_22-nf57_11)], bl
shr ebx, 16
mov [edx+(nf57_23-nf57_11)], bh
mov [edx+(nf57_24-nf57_11)], bl
mov al, [esi+11]
mov ebx, [ecx+eax*4]
mov [edx+(nf57_25-nf57_11)], bh
mov [edx+(nf57_26-nf57_11)], bl
shr ebx, 16
mov [edx+(nf57_27-nf57_11)], bh
mov [edx+(nf57_28-nf57_11)], bl
mov al, [esi+12]
mov ebx, [ecx+eax*4]
mov [edx+(nf57_31-nf57_11)], bh
mov [edx+(nf57_32-nf57_11)], bl
shr ebx, 16
mov [edx+(nf57_33-nf57_11)], bh
mov [edx+(nf57_34-nf57_11)], bl
mov al, [esi+13]
mov ebx, [ecx+eax*4]
mov [edx+(nf57_35-nf57_11)], bh
mov [edx+(nf57_36-nf57_11)], bl
shr ebx, 16
mov [edx+(nf57_37-nf57_11)], bh
mov [edx+(nf57_38-nf57_11)], bl
mov al, [esi+14]
mov ebx, [ecx+eax*4]
mov [edx+(nf57_41-nf57_11)], bh
mov [edx+(nf57_42-nf57_11)], bl
shr ebx, 16
mov [edx+(nf57_43-nf57_11)], bh
mov [edx+(nf57_44-nf57_11)], bl
mov al, [esi+15]
mov ebx, [ecx+eax*4]
mov [edx+(nf57_45-nf57_11)], bh
mov [edx+(nf57_46-nf57_11)], bl
shr ebx, 16
mov [edx+(nf57_47-nf57_11)], bh
mov [edx+(nf57_48-nf57_11)], bl
push ebp
push esi
; Load bx,dx,cx,bp with four colors
Trans16 bx, esi, 1
Trans16 dx, esi+2
Trans16 cx, esi+4, 1
Trans16 bp, esi+6
mov esi, nf_width
jmp nf57_0 ; flush prefetch
ALIGN 4
nf57_0:
nf57_11:mov ax, bx
shl eax, 16
nf57_12:mov ax, bx
mov [edi], eax
mov [edi+esi], eax
nf57_13:mov ax, bx
shl eax, 16
nf57_14:mov ax, bx
mov [edi+4], eax
mov [edi+esi+4], eax
nf57_15:mov ax, bx
shl eax, 16
nf57_16:mov ax, bx
mov [edi+8], eax
mov [edi+esi+8], eax
nf57_17:mov ax, bx
shl eax, 16
nf57_18:mov ax, bx
mov [edi+12], eax
mov [edi+esi+12], eax
lea edi, [edi+esi*2]
nf57_21:mov ax, bx
shl eax, 16
nf57_22:mov ax, bx
mov [edi], eax
mov [edi+esi], eax
nf57_23:mov ax, bx
shl eax, 16
nf57_24:mov ax, bx
mov [edi+4], eax
mov [edi+esi+4], eax
nf57_25:mov ax, bx
shl eax, 16
nf57_26:mov ax, bx
mov [edi+8], eax
mov [edi+esi+8], eax
nf57_27:mov ax, bx
shl eax, 16
nf57_28:mov ax, bx
mov [edi+12], eax
mov [edi+esi+12], eax
lea edi, [edi+esi*2]
nf57_31:mov ax, bx
shl eax, 16
nf57_32:mov ax, bx
mov [edi], eax
mov [edi+esi], eax
nf57_33:mov ax, bx
shl eax, 16
nf57_34:mov ax, bx
mov [edi+4], eax
mov [edi+esi+4], eax
nf57_35:mov ax, bx
shl eax, 16
nf57_36:mov ax, bx
mov [edi+8], eax
mov [edi+esi+8], eax
nf57_37:mov ax, bx
shl eax, 16
nf57_38:mov ax, bx
mov [edi+12], eax
mov [edi+esi+12], eax
lea edi, [edi+esi*2]
nf57_41:mov ax, bx
shl eax, 16
nf57_42:mov ax, bx
mov [edi], eax
mov [edi+esi], eax
nf57_43:mov ax, bx
shl eax, 16
nf57_44:mov ax, bx
mov [edi+4], eax
mov [edi+esi+4], eax
nf57_45:mov ax, bx
shl eax, 16
nf57_46:mov ax, bx
mov [edi+8], eax
mov [edi+esi+8], eax
nf57_47:mov ax, bx
shl eax, 16
nf57_48:mov ax, bx
mov [edi+12], eax
mov [edi+esi+12], eax
add edi, esi
pop esi
pop ebp
add esi, 16
sub edi, nfpk_back_right
retn
;----------------------------------------
ALIGN 4
nf10: ; 2x2 4x4x2 (48 bytes)
test word ptr [esi], 08000h
jnz nf26
if 0 ;debug
add esi, 48
mov ebx, 0
jmp nf_solid
endif
xor eax, eax
lea ecx, nfhpk_mov4
lea edx, byte ptr ds:nf10_11+2
mov al, [esi+8]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_11-nf10_11)], bh
mov [edx+(nf10_12-nf10_11)], bl
shr ebx, 16
mov [edx+(nf10_13-nf10_11)], bh
mov [edx+(nf10_14-nf10_11)], bl
mov al, [esi+9]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_15-nf10_11)], bh
mov [edx+(nf10_16-nf10_11)], bl
shr ebx, 16
mov [edx+(nf10_17-nf10_11)], bh
mov [edx+(nf10_18-nf10_11)], bl
mov al, [esi+10]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_21-nf10_11)], bh
mov [edx+(nf10_22-nf10_11)], bl
shr ebx, 16
mov [edx+(nf10_23-nf10_11)], bh
mov [edx+(nf10_24-nf10_11)], bl
mov al, [esi+11]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_25-nf10_11)], bh
mov [edx+(nf10_26-nf10_11)], bl
shr ebx, 16
mov [edx+(nf10_27-nf10_11)], bh
mov [edx+(nf10_28-nf10_11)], bl
mov al, [esi+20]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_31-nf10_11)], bh
mov [edx+(nf10_32-nf10_11)], bl
shr ebx, 16
mov [edx+(nf10_33-nf10_11)], bh
mov [edx+(nf10_34-nf10_11)], bl
mov al, [esi+21]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_35-nf10_11)], bh
mov [edx+(nf10_36-nf10_11)], bl
shr ebx, 16
mov [edx+(nf10_37-nf10_11)], bh
mov [edx+(nf10_38-nf10_11)], bl
mov al, [esi+22]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_41-nf10_11)], bh
mov [edx+(nf10_42-nf10_11)], bl
shr ebx, 16
mov [edx+(nf10_43-nf10_11)], bh
mov [edx+(nf10_44-nf10_11)], bl
mov al, [esi+23]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_45-nf10_11)], bh
mov [edx+(nf10_46-nf10_11)], bl
shr ebx, 16
mov [edx+(nf10_47-nf10_11)], bh
mov [edx+(nf10_48-nf10_11)], bl
lea edx, [edx+(nf10_51-nf10_11)]
mov al, [esi+32]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_51-nf10_51)], bh
mov [edx+(nf10_52-nf10_51)], bl
shr ebx, 16
mov [edx+(nf10_53-nf10_51)], bh
mov [edx+(nf10_54-nf10_51)], bl
mov al, [esi+33]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_55-nf10_51)], bh
mov [edx+(nf10_56-nf10_51)], bl
shr ebx, 16
mov [edx+(nf10_57-nf10_51)], bh
mov [edx+(nf10_58-nf10_51)], bl
mov al, [esi+34]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_61-nf10_51)], bh
mov [edx+(nf10_62-nf10_51)], bl
shr ebx, 16
mov [edx+(nf10_63-nf10_51)], bh
mov [edx+(nf10_64-nf10_51)], bl
mov al, [esi+35]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_65-nf10_51)], bh
mov [edx+(nf10_66-nf10_51)], bl
shr ebx, 16
mov [edx+(nf10_67-nf10_51)], bh
mov [edx+(nf10_68-nf10_51)], bl
mov al, [esi+44]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_71-nf10_51)], bh
mov [edx+(nf10_72-nf10_51)], bl
shr ebx, 16
mov [edx+(nf10_73-nf10_51)], bh
mov [edx+(nf10_74-nf10_51)], bl
mov al, [esi+45]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_75-nf10_51)], bh
mov [edx+(nf10_76-nf10_51)], bl
shr ebx, 16
mov [edx+(nf10_77-nf10_51)], bh
mov [edx+(nf10_78-nf10_51)], bl
mov al, [esi+46]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_81-nf10_51)], bh
mov [edx+(nf10_82-nf10_51)], bl
shr ebx, 16
mov [edx+(nf10_83-nf10_51)], bh
mov [edx+(nf10_84-nf10_51)], bl
mov al, [esi+47]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_85-nf10_51)], bh
mov [edx+(nf10_86-nf10_51)], bl
shr ebx, 16
mov [edx+(nf10_87-nf10_51)], bh
mov [edx+(nf10_88-nf10_51)], bl
push ebp
push esi
; Load bx,dx,cx,bp with four colors
Trans16 bx, esi
Trans16 dx, esi+2
Trans16 cx, esi+4
Trans16 bp, esi+6
mov esi, nf_width
jmp nf10_0 ; flush prefetch
ALIGN 4
nf10_0:
nf10_11:mov ax, bx
shl eax, 16
nf10_12:mov ax, bx
mov [edi], eax
nf10_13:mov ax, bx
shl eax, 16
nf10_14:mov ax, bx
mov [edi+4], eax
add edi, esi
nf10_15:mov ax, bx
shl eax, 16
nf10_16:mov ax, bx
mov [edi], eax
nf10_17:mov ax, bx
shl eax, 16
nf10_18:mov ax, bx
mov [edi+4], eax
add edi, esi
nf10_21:mov ax, bx
shl eax, 16
nf10_22:mov ax, bx
mov [edi], eax
nf10_23:mov ax, bx
shl eax, 16
nf10_24:mov ax, bx
mov [edi+4], eax
add edi, esi
nf10_25:mov ax, bx
shl eax, 16
nf10_26:mov ax, bx
mov [edi], eax
nf10_27:mov ax, bx
shl eax, 16
nf10_28:mov ax, bx
mov [edi+4], eax
add edi, esi
; Load bx,dx,cx,bp with four colors
mov esi, [esp]
Trans16 bx, esi+12
Trans16 dx, esi+14
Trans16 cx, esi+16
Trans16 bp, esi+18
mov esi, nf_width
nf10_31:mov ax, bx
shl eax, 16
nf10_32:mov ax, bx
mov [edi], eax
nf10_33:mov ax, bx
shl eax, 16
nf10_34:mov ax, bx
mov [edi+4], eax
add edi, esi
nf10_35:mov ax, bx
shl eax, 16
nf10_36:mov ax, bx
mov [edi], eax
nf10_37:mov ax, bx
shl eax, 16
nf10_38:mov ax, bx
mov [edi+4], eax
add edi, esi
nf10_41:mov ax, bx
shl eax, 16
nf10_42:mov ax, bx
mov [edi], eax
nf10_43:mov ax, bx
shl eax, 16
nf10_44:mov ax, bx
mov [edi+4], eax
add edi, esi
nf10_45:mov ax, bx
shl eax, 16
nf10_46:mov ax, bx
mov [edi], eax
nf10_47:mov ax, bx
shl eax, 16
nf10_48:mov ax, bx
mov [edi+4], eax
add edi, esi
lea eax, [esi*8-8]
sub edi, eax
; Load bx,dx,cx,bp with four colors
mov esi, [esp]
Trans16 bx, esi+24
Trans16 dx, esi+26
Trans16 cx, esi+28
Trans16 bp, esi+30
mov esi, nf_width
nf10_51:mov ax, bx
shl eax, 16
nf10_52:mov ax, bx
mov [edi], eax
nf10_53:mov ax, bx
shl eax, 16
nf10_54:mov ax, bx
mov [edi+4], eax
add edi, esi
nf10_55:mov ax, bx
shl eax, 16
nf10_56:mov ax, bx
mov [edi], eax
nf10_57:mov ax, bx
shl eax, 16
nf10_58:mov ax, bx
mov [edi+4], eax
add edi, esi
nf10_61:mov ax, bx
shl eax, 16
nf10_62:mov ax, bx
mov [edi], eax
nf10_63:mov ax, bx
shl eax, 16
nf10_64:mov ax, bx
mov [edi+4], eax
add edi, esi
nf10_65:mov ax, bx
shl eax, 16
nf10_66:mov ax, bx
mov [edi], eax
nf10_67:mov ax, bx
shl eax, 16
nf10_68:mov ax, bx
mov [edi+4], eax
add edi, esi
; Load bx,dx,cx,bp with four colors
mov esi, [esp]
Trans16 bx, esi+36
Trans16 dx, esi+38
Trans16 cx, esi+40
Trans16 bp, esi+42
mov esi, nf_width
nf10_71:mov ax, bx
shl eax, 16
nf10_72:mov ax, bx
mov [edi], eax
nf10_73:mov ax, bx
shl eax, 16
nf10_74:mov ax, bx
mov [edi+4], eax
add edi, esi
nf10_75:mov ax, bx
shl eax, 16
nf10_76:mov ax, bx
mov [edi], eax
nf10_77:mov ax, bx
shl eax, 16
nf10_78:mov ax, bx
mov [edi+4], eax
add edi, esi
nf10_81:mov ax, bx
shl eax, 16
nf10_82:mov ax, bx
mov [edi], eax
nf10_83:mov ax, bx
shl eax, 16
nf10_84:mov ax, bx
mov [edi+4], eax
add edi, esi
nf10_85:mov ax, bx
shl eax, 16
nf10_86:mov ax, bx
mov [edi], eax
nf10_87:mov ax, bx
shl eax, 16
nf10_88:mov ax, bx
mov [edi+4], eax
pop esi
pop ebp
add esi, 48
sub edi, 8
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
;----------------------------------------
ALIGN 4
;nf10+16
nf26: ; 2x1 4x8x2 (32 bytes)
test word ptr [esi+16], 08000h
jnz nf42
if 0 ;debug
add esi, 32
mov ebx, 0
jmp nf_solid
endif
xor eax, eax
lea ecx, nfhpk_mov4
lea edx, byte ptr ds:nf26_11+2
mov al, [esi+8]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_11-nf26_11)], bh
mov [edx+(nf26_12-nf26_11)], bl
shr ebx, 16
mov [edx+(nf26_13-nf26_11)], bh
mov [edx+(nf26_14-nf26_11)], bl
mov al, [esi+9]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_15-nf26_11)], bh
mov [edx+(nf26_16-nf26_11)], bl
shr ebx, 16
mov [edx+(nf26_17-nf26_11)], bh
mov [edx+(nf26_18-nf26_11)], bl
mov al, [esi+10]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_21-nf26_11)], bh
mov [edx+(nf26_22-nf26_11)], bl
shr ebx, 16
mov [edx+(nf26_23-nf26_11)], bh
mov [edx+(nf26_24-nf26_11)], bl
mov al, [esi+11]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_25-nf26_11)], bh
mov [edx+(nf26_26-nf26_11)], bl
shr ebx, 16
mov [edx+(nf26_27-nf26_11)], bh
mov [edx+(nf26_28-nf26_11)], bl
mov al, [esi+12]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_31-nf26_11)], bh
mov [edx+(nf26_32-nf26_11)], bl
shr ebx, 16
mov [edx+(nf26_33-nf26_11)], bh
mov [edx+(nf26_34-nf26_11)], bl
mov al, [esi+13]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_35-nf26_11)], bh
mov [edx+(nf26_36-nf26_11)], bl
shr ebx, 16
mov [edx+(nf26_37-nf26_11)], bh
mov [edx+(nf26_38-nf26_11)], bl
mov al, [esi+14]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_41-nf26_11)], bh
mov [edx+(nf26_42-nf26_11)], bl
shr ebx, 16
mov [edx+(nf26_43-nf26_11)], bh
mov [edx+(nf26_44-nf26_11)], bl
mov al, [esi+15]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_45-nf26_11)], bh
mov [edx+(nf26_46-nf26_11)], bl
shr ebx, 16
mov [edx+(nf26_47-nf26_11)], bh
mov [edx+(nf26_48-nf26_11)], bl
lea edx, [edx+(nf26_51-nf26_11)]
mov al, [esi+24]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_51-nf26_51)], bh
mov [edx+(nf26_52-nf26_51)], bl
shr ebx, 16
mov [edx+(nf26_53-nf26_51)], bh
mov [edx+(nf26_54-nf26_51)], bl
mov al, [esi+25]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_55-nf26_51)], bh
mov [edx+(nf26_56-nf26_51)], bl
shr ebx, 16
mov [edx+(nf26_57-nf26_51)], bh
mov [edx+(nf26_58-nf26_51)], bl
mov al, [esi+26]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_61-nf26_51)], bh
mov [edx+(nf26_62-nf26_51)], bl
shr ebx, 16
mov [edx+(nf26_63-nf26_51)], bh
mov [edx+(nf26_64-nf26_51)], bl
mov al, [esi+27]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_65-nf26_51)], bh
mov [edx+(nf26_66-nf26_51)], bl
shr ebx, 16
mov [edx+(nf26_67-nf26_51)], bh
mov [edx+(nf26_68-nf26_51)], bl
mov al, [esi+28]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_71-nf26_51)], bh
mov [edx+(nf26_72-nf26_51)], bl
shr ebx, 16
mov [edx+(nf26_73-nf26_51)], bh
mov [edx+(nf26_74-nf26_51)], bl
mov al, [esi+29]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_75-nf26_51)], bh
mov [edx+(nf26_76-nf26_51)], bl
shr ebx, 16
mov [edx+(nf26_77-nf26_51)], bh
mov [edx+(nf26_78-nf26_51)], bl
mov al, [esi+30]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_81-nf26_51)], bh
mov [edx+(nf26_82-nf26_51)], bl
shr ebx, 16
mov [edx+(nf26_83-nf26_51)], bh
mov [edx+(nf26_84-nf26_51)], bl
mov al, [esi+31]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_85-nf26_51)], bh
mov [edx+(nf26_86-nf26_51)], bl
shr ebx, 16
mov [edx+(nf26_87-nf26_51)], bh
mov [edx+(nf26_88-nf26_51)], bl
push ebp
push esi
; Load bx,dx,cx,bp with four colors
Trans16 bx, esi, 1
Trans16 dx, esi+2
Trans16 cx, esi+4
Trans16 bp, esi+6
mov esi, nf_width
jmp nf26_0 ; flush prefetch
ALIGN 4
nf26_0:
nf26_11:mov ax, bx
shl eax, 16
nf26_12:mov ax, bx
mov [edi], eax
nf26_13:mov ax, bx
shl eax, 16
nf26_14:mov ax, bx
mov [edi+4], eax
add edi, esi
nf26_15:mov ax, bx
shl eax, 16
nf26_16:mov ax, bx
mov [edi], eax
nf26_17:mov ax, bx
shl eax, 16
nf26_18:mov ax, bx
mov [edi+4], eax
add edi, esi
nf26_21:mov ax, bx
shl eax, 16
nf26_22:mov ax, bx
mov [edi], eax
nf26_23:mov ax, bx
shl eax, 16
nf26_24:mov ax, bx
mov [edi+4], eax
add edi, esi
nf26_25:mov ax, bx
shl eax, 16
nf26_26:mov ax, bx
mov [edi], eax
nf26_27:mov ax, bx
shl eax, 16
nf26_28:mov ax, bx
mov [edi+4], eax
add edi, esi
nf26_31:mov ax, bx
shl eax, 16
nf26_32:mov ax, bx
mov [edi], eax
nf26_33:mov ax, bx
shl eax, 16
nf26_34:mov ax, bx
mov [edi+4], eax
add edi, esi
nf26_35:mov ax, bx
shl eax, 16
nf26_36:mov ax, bx
mov [edi], eax
nf26_37:mov ax, bx
shl eax, 16
nf26_38:mov ax, bx
mov [edi+4], eax
add edi, esi
nf26_41:mov ax, bx
shl eax, 16
nf26_42:mov ax, bx
mov [edi], eax
nf26_43:mov ax, bx
shl eax, 16
nf26_44:mov ax, bx
mov [edi+4], eax
add edi, esi
nf26_45:mov ax, bx
shl eax, 16
nf26_46:mov ax, bx
mov [edi], eax
nf26_47:mov ax, bx
shl eax, 16
nf26_48:mov ax, bx
mov [edi+4], eax
add edi, esi
lea eax, [esi*8-8]
sub edi, eax
; Load bx,dx,cx,bp with four colors
mov esi, [esp]
Trans16 bx, esi+16
Trans16 dx, esi+18
Trans16 cx, esi+20
Trans16 bp, esi+22
mov esi, nf_width
nf26_51:mov ax, bx
shl eax, 16
nf26_52:mov ax, bx
mov [edi], eax
nf26_53:mov ax, bx
shl eax, 16
nf26_54:mov ax, bx
mov [edi+4], eax
add edi, esi
nf26_55:mov ax, bx
shl eax, 16
nf26_56:mov ax, bx
mov [edi], eax
nf26_57:mov ax, bx
shl eax, 16
nf26_58:mov ax, bx
mov [edi+4], eax
add edi, esi
nf26_61:mov ax, bx
shl eax, 16
nf26_62:mov ax, bx
mov [edi], eax
nf26_63:mov ax, bx
shl eax, 16
nf26_64:mov ax, bx
mov [edi+4], eax
add edi, esi
nf26_65:mov ax, bx
shl eax, 16
nf26_66:mov ax, bx
mov [edi], eax
nf26_67:mov ax, bx
shl eax, 16
nf26_68:mov ax, bx
mov [edi+4], eax
add edi, esi
nf26_71:mov ax, bx
shl eax, 16
nf26_72:mov ax, bx
mov [edi], eax
nf26_73:mov ax, bx
shl eax, 16
nf26_74:mov ax, bx
mov [edi+4], eax
add edi, esi
nf26_75:mov ax, bx
shl eax, 16
nf26_76:mov ax, bx
mov [edi], eax
nf26_77:mov ax, bx
shl eax, 16
nf26_78:mov ax, bx
mov [edi+4], eax
add edi, esi
nf26_81:mov ax, bx
shl eax, 16
nf26_82:mov ax, bx
mov [edi], eax
nf26_83:mov ax, bx
shl eax, 16
nf26_84:mov ax, bx
mov [edi+4], eax
add edi, esi
nf26_85:mov ax, bx
shl eax, 16
nf26_86:mov ax, bx
mov [edi], eax
nf26_87:mov ax, bx
shl eax, 16
nf26_88:mov ax, bx
mov [edi+4], eax
pop esi
pop ebp
add esi, 32
sub edi, 8
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
;----------------------------------------
ALIGN 4
;nf10+32
nf42: ; 1x2 8x4x2 (32 bytes)
if 0 ;debug
add esi, 32
mov ebx, 0
jmp nf_solid
endif
xor eax, eax
lea ecx, nfhpk_mov4
lea edx, byte ptr ds:nf42_11+2
mov al, [esi+8]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_11-nf42_11)], bh
mov [edx+(nf42_12-nf42_11)], bl
shr ebx, 16
mov [edx+(nf42_13-nf42_11)], bh
mov [edx+(nf42_14-nf42_11)], bl
mov al, [esi+9]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_15-nf42_11)], bh
mov [edx+(nf42_16-nf42_11)], bl
shr ebx, 16
mov [edx+(nf42_17-nf42_11)], bh
mov [edx+(nf42_18-nf42_11)], bl
mov al, [esi+10]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_21-nf42_11)], bh
mov [edx+(nf42_22-nf42_11)], bl
shr ebx, 16
mov [edx+(nf42_23-nf42_11)], bh
mov [edx+(nf42_24-nf42_11)], bl
mov al, [esi+11]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_25-nf42_11)], bh
mov [edx+(nf42_26-nf42_11)], bl
shr ebx, 16
mov [edx+(nf42_27-nf42_11)], bh
mov [edx+(nf42_28-nf42_11)], bl
mov al, [esi+12]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_31-nf42_11)], bh
mov [edx+(nf42_32-nf42_11)], bl
shr ebx, 16
mov [edx+(nf42_33-nf42_11)], bh
mov [edx+(nf42_34-nf42_11)], bl
mov al, [esi+13]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_35-nf42_11)], bh
mov [edx+(nf42_36-nf42_11)], bl
shr ebx, 16
mov [edx+(nf42_37-nf42_11)], bh
mov [edx+(nf42_38-nf42_11)], bl
mov al, [esi+14]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_41-nf42_11)], bh
mov [edx+(nf42_42-nf42_11)], bl
shr ebx, 16
mov [edx+(nf42_43-nf42_11)], bh
mov [edx+(nf42_44-nf42_11)], bl
mov al, [esi+15]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_45-nf42_11)], bh
mov [edx+(nf42_46-nf42_11)], bl
shr ebx, 16
mov [edx+(nf42_47-nf42_11)], bh
mov [edx+(nf42_48-nf42_11)], bl
lea edx, [edx+(nf42_51-nf42_11)]
mov al, [esi+24]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_51-nf42_51)], bh
mov [edx+(nf42_52-nf42_51)], bl
shr ebx, 16
mov [edx+(nf42_53-nf42_51)], bh
mov [edx+(nf42_54-nf42_51)], bl
mov al, [esi+25]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_55-nf42_51)], bh
mov [edx+(nf42_56-nf42_51)], bl
shr ebx, 16
mov [edx+(nf42_57-nf42_51)], bh
mov [edx+(nf42_58-nf42_51)], bl
mov al, [esi+26]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_61-nf42_51)], bh
mov [edx+(nf42_62-nf42_51)], bl
shr ebx, 16
mov [edx+(nf42_63-nf42_51)], bh
mov [edx+(nf42_64-nf42_51)], bl
mov al, [esi+27]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_65-nf42_51)], bh
mov [edx+(nf42_66-nf42_51)], bl
shr ebx, 16
mov [edx+(nf42_67-nf42_51)], bh
mov [edx+(nf42_68-nf42_51)], bl
mov al, [esi+28]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_71-nf42_51)], bh
mov [edx+(nf42_72-nf42_51)], bl
shr ebx, 16
mov [edx+(nf42_73-nf42_51)], bh
mov [edx+(nf42_74-nf42_51)], bl
mov al, [esi+29]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_75-nf42_51)], bh
mov [edx+(nf42_76-nf42_51)], bl
shr ebx, 16
mov [edx+(nf42_77-nf42_51)], bh
mov [edx+(nf42_78-nf42_51)], bl
mov al, [esi+30]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_81-nf42_51)], bh
mov [edx+(nf42_82-nf42_51)], bl
shr ebx, 16
mov [edx+(nf42_83-nf42_51)], bh
mov [edx+(nf42_84-nf42_51)], bl
mov al, [esi+31]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_85-nf42_51)], bh
mov [edx+(nf42_86-nf42_51)], bl
shr ebx, 16
mov [edx+(nf42_87-nf42_51)], bh
mov [edx+(nf42_88-nf42_51)], bl
push ebp
push esi
; Load bx,dx,cx,bp with four colors
Trans16 bx, esi, 1
Trans16 dx, esi+2
Trans16 cx, esi+4
Trans16 bp, esi+6
mov esi, nf_width
jmp nf42_0 ; flush prefetch
ALIGN 4
nf42_0:
nf42_11:mov ax, bx
shl eax, 16
nf42_12:mov ax, bx
mov [edi], eax
nf42_13:mov ax, bx
shl eax, 16
nf42_14:mov ax, bx
mov [edi+4], eax
nf42_15:mov ax, bx
shl eax, 16
nf42_16:mov ax, bx
mov [edi+8], eax
nf42_17:mov ax, bx
shl eax, 16
nf42_18:mov ax, bx
mov [edi+12], eax
add edi, esi
nf42_21:mov ax, bx
shl eax, 16
nf42_22:mov ax, bx
mov [edi], eax
nf42_23:mov ax, bx
shl eax, 16
nf42_24:mov ax, bx
mov [edi+4], eax
nf42_25:mov ax, bx
shl eax, 16
nf42_26:mov ax, bx
mov [edi+8], eax
nf42_27:mov ax, bx
shl eax, 16
nf42_28:mov ax, bx
mov [edi+12], eax
add edi, esi
nf42_31:mov ax, bx
shl eax, 16
nf42_32:mov ax, bx
mov [edi], eax
nf42_33:mov ax, bx
shl eax, 16
nf42_34:mov ax, bx
mov [edi+4], eax
nf42_35:mov ax, bx
shl eax, 16
nf42_36:mov ax, bx
mov [edi+8], eax
nf42_37:mov ax, bx
shl eax, 16
nf42_38:mov ax, bx
mov [edi+12], eax
add edi, esi
nf42_41:mov ax, bx
shl eax, 16
nf42_42:mov ax, bx
mov [edi], eax
nf42_43:mov ax, bx
shl eax, 16
nf42_44:mov ax, bx
mov [edi+4], eax
nf42_45:mov ax, bx
shl eax, 16
nf42_46:mov ax, bx
mov [edi+8], eax
nf42_47:mov ax, bx
shl eax, 16
nf42_48:mov ax, bx
mov [edi+12], eax
add edi, esi
; Load bx,dx,cx,bp with four colors
mov esi, [esp]
Trans16 bx, esi+16, 1
Trans16 dx, esi+18
Trans16 cx, esi+20
Trans16 bp, esi+22
mov esi, nf_width
nf42_51:mov ax, bx
shl eax, 16
nf42_52:mov ax, bx
mov [edi], eax
nf42_53:mov ax, bx
shl eax, 16
nf42_54:mov ax, bx
mov [edi+4], eax
nf42_55:mov ax, bx
shl eax, 16
nf42_56:mov ax, bx
mov [edi+8], eax
nf42_57:mov ax, bx
shl eax, 16
nf42_58:mov ax, bx
mov [edi+12], eax
add edi, esi
nf42_61:mov ax, bx
shl eax, 16
nf42_62:mov ax, bx
mov [edi], eax
nf42_63:mov ax, bx
shl eax, 16
nf42_64:mov ax, bx
mov [edi+4], eax
nf42_65:mov ax, bx
shl eax, 16
nf42_66:mov ax, bx
mov [edi+8], eax
nf42_67:mov ax, bx
shl eax, 16
nf42_68:mov ax, bx
mov [edi+12], eax
add edi, esi
nf42_71:mov ax, bx
shl eax, 16
nf42_72:mov ax, bx
mov [edi], eax
nf42_73:mov ax, bx
shl eax, 16
nf42_74:mov ax, bx
mov [edi+4], eax
nf42_75:mov ax, bx
shl eax, 16
nf42_76:mov ax, bx
mov [edi+8], eax
nf42_77:mov ax, bx
shl eax, 16
nf42_78:mov ax, bx
mov [edi+12], eax
add edi, esi
nf42_81:mov ax, bx
shl eax, 16
nf42_82:mov ax, bx
mov [edi], eax
nf42_83:mov ax, bx
shl eax, 16
nf42_84:mov ax, bx
mov [edi+4], eax
nf42_85:mov ax, bx
shl eax, 16
nf42_86:mov ax, bx
mov [edi+8], eax
nf42_87:mov ax, bx
shl eax, 16
nf42_88:mov ax, bx
mov [edi+12], eax
pop esi
pop ebp
add esi, 32
sub edi, nfpk_back_right
retn
;----------------------------------------
ALIGN 4
nf11: ; 8x8x16 (128 bytes)
if 0 ;debug
add esi, 128
mov ebx, 0
jmp nf_solid
endif
mov edx, nf_width
Trans16Blk MACRO idx
Trans16 bx, idx
mov [edi], bx
Trans16 bx, idx+2
mov [edi+2], bx
Trans16 bx, idx+4
mov [edi+4], bx
Trans16 bx, idx+6
mov [edi+6], bx
Trans16 bx, idx+8
mov [edi+8], bx
Trans16 bx, idx+10
mov [edi+10], bx
Trans16 bx, idx+12
mov [edi+12], bx
Trans16 bx, idx+14
mov [edi+14], bx
ENDM
Trans16Blk esi ;0
add edi, edx
Trans16Blk esi+16 ;1
add edi, edx
Trans16Blk esi+32 ;2
add edi, edx
Trans16Blk esi+48 ;3
add edi, edx
Trans16Blk esi+64 ;4
add edi, edx
Trans16Blk esi+80 ;5
add edi, edx
Trans16Blk esi+96 ;6
add edi, edx
Trans16Blk esi+112 ;7
add esi, 128
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
;----------------------------------------
ALIGN 4
nf12: ; low 4x4x16 (32 bytes)
if 0 ;debug
add esi, 32
mov ebx, 0
jmp nf_solid
endif
mov edx, nf_width
Trans16 bx, esi
shrd eax, ebx, 16
mov ax, bx
mov [edi], eax
mov [edi+edx], eax
Trans16 bx, esi+2
shrd eax, ebx, 16
mov ax, bx
mov [edi+4], eax
mov [edi+edx+4], eax
Trans16 bx, esi+4
shrd eax, ebx, 16
mov ax, bx
mov [edi+8], eax
mov [edi+edx+8], eax
Trans16 bx, esi+6
shrd eax, ebx, 16
mov ax, bx
mov [edi+12], eax
mov [edi+edx+12], eax
lea edi, [edi+edx*2]
Trans16 bx, esi+8
shrd eax, ebx, 16
mov ax, bx
mov [edi], eax
mov [edi+edx], eax
Trans16 bx, esi+10
shrd eax, ebx, 16
mov ax, bx
mov [edi+4], eax
mov [edi+edx+4], eax
Trans16 bx, esi+12
shrd eax, ebx, 16
mov ax, bx
mov [edi+8], eax
mov [edi+edx+8], eax
Trans16 bx, esi+14
shrd eax, ebx, 16
mov ax, bx
mov [edi+12], eax
mov [edi+edx+12], eax
lea edi, [edi+edx*2]
Trans16 bx, esi+16
shrd eax, ebx, 16
mov ax, bx
mov [edi], eax
mov [edi+edx], eax
Trans16 bx, esi+18
shrd eax, ebx, 16
mov ax, bx
mov [edi+4], eax
mov [edi+edx+4], eax
Trans16 bx, esi+20
shrd eax, ebx, 16
mov ax, bx
mov [edi+8], eax
mov [edi+edx+8], eax
Trans16 bx, esi+22
shrd eax, ebx, 16
mov ax, bx
mov [edi+12], eax
mov [edi+edx+12], eax
lea edi, [edi+edx*2]
Trans16 bx, esi+24
shrd eax, ebx, 16
mov ax, bx
mov [edi], eax
mov [edi+edx], eax
Trans16 bx, esi+26
shrd eax, ebx, 16
mov ax, bx
mov [edi+4], eax
mov [edi+edx+4], eax
Trans16 bx, esi+28
shrd eax, ebx, 16
mov ax, bx
mov [edi+8], eax
mov [edi+edx+8], eax
Trans16 bx, esi+30
shrd eax, ebx, 16
mov ax, bx
mov [edi+12], eax
mov [edi+edx+12], eax
add edi, edx
sub edi, nfpk_back_right
add esi, 32
retn
;----------------------------------------
ALIGN 4
nf13: ; 2x2 4x4x0 (8 bytes)
if 0 ;debug
add esi, 8
mov ebx, 0
jmp nf_solid
endif
mov edx, nf_width
Trans16 cx, esi
shrd ebx, ecx, 16
mov bx, cx
Trans16 cx, esi+2
shrd eax, ecx, 16
mov ax, cx
mov ecx, eax
mov [edi], ebx
mov [edi+4], ebx
mov [edi+8], ecx
mov [edi+12], ecx
mov [edi+edx], ebx
mov [edi+edx+4], ebx
mov [edi+edx+8], ecx
mov [edi+edx+12], ecx
lea edi, [edi+edx*2]
mov [edi], ebx
mov [edi+4], ebx
mov [edi+8], ecx
mov [edi+12], ecx
mov [edi+edx], ebx
mov [edi+edx+4], ebx
mov [edi+edx+8], ecx
mov [edi+edx+12], ecx
lea edi, [edi+edx*2]
Trans16 cx, esi+4
shrd ebx, ecx, 16
mov bx, cx
Trans16 cx, esi+6
shrd eax, ecx, 16
mov ax, cx
mov ecx, eax
mov [edi], ebx
mov [edi+4], ebx
mov [edi+8], ecx
mov [edi+12], ecx
mov [edi+edx], ebx
mov [edi+edx+4], ebx
mov [edi+edx+8], ecx
mov [edi+edx+12], ecx
lea edi, [edi+edx*2]
mov [edi], ebx
mov [edi+4], ebx
mov [edi+8], ecx
mov [edi+12], ecx
mov [edi+edx], ebx
mov [edi+edx+4], ebx
mov [edi+edx+8], ecx
mov [edi+edx+12], ecx
add edi, edx
sub edi, nfpk_back_right
add esi, 8
retn
;----------------------------------------
ALIGN 4
nf14: ; 8x8x0 (2 bytes)
Trans16 cx, esi
add esi, 2
shrd ebx, ecx, 16
mov bx, cx
nf_solid:
mov edx, nf_width
mov [edi], ebx
mov [edi+4], ebx
mov [edi+8], ebx
mov [edi+12], ebx
add edi, edx
mov [edi], ebx
mov [edi+4], ebx
mov [edi+8], ebx
mov [edi+12], ebx
add edi, edx
mov [edi], ebx
mov [edi+4], ebx
mov [edi+8], ebx
mov [edi+12], ebx
add edi, edx
mov [edi], ebx
mov [edi+4], ebx
mov [edi+8], ebx
mov [edi+12], ebx
add edi, edx
mov [edi], ebx
mov [edi+4], ebx
mov [edi+8], ebx
mov [edi+12], ebx
add edi, edx
mov [edi], ebx
mov [edi+4], ebx
mov [edi+8], ebx
mov [edi+12], ebx
add edi, edx
mov [edi], ebx
mov [edi+4], ebx
mov [edi+8], ebx
mov [edi+12], ebx
add edi, edx
mov [edi], ebx
mov [edi+4], ebx
mov [edi+8], ebx
mov [edi+12], ebx
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
;----------------------------------------
ALIGN 4
nf15: ; unused
retn
nfHPkDecomp ENDP
; Normal version
;
nfPkDecomp PROC USES ESI EDI EBX, \
ops:PTRBYTE, comp:PTRBYTE, \
x:DWORD, y:DWORD, w:DWORD, h:DWORD
LOCAL tbuf: PTRBYTE
LOCAL new_row:DWORD
LOCAL DiffBufPtrs:DWORD
LOCAL nfpk_back_right: DWORD
LOCAL wcnt:DWORD
LOG_LABEL "StartPkDecomp"
.data
nfpk_OpTbl label dword
dword offset nf0 ; Prev Same (0)
dword offset nf1 ; No change (and copied to screen) (0)
dword offset nf2 ; Near shift from older part of current buf (1)
dword offset nf3 ; Near shift from newer part of current buf (1)
dword offset nf4 ; Near shift from previous buffer (1)
dword offset nf5 ; Far shift from previous buffer (2)
dword offset nf6 ; Far shift from current buffer (2)
; [Or if COMPOPS, run of no changes (0)]
dword offset nf7 ; 8x8x1 (10 bytes) or low 4x4x1 (4 bytes)
dword offset nf8 ; 2x2 4x4x1 (16 bytes) or 2x1 4x8x1 (12 bytes) or 1x2 8x4x1 (12 bytes)
dword offset nf9 ; 8x8x2 (20 bytes) or low 4x4x2 (8 bytes) or
; low 4x8x2 (12 bytes) or low 8x4x2 (12 bytes)
dword offset nf10 ; 2x2 4x4x2 (32 bytes) or 2x1 4x8x2 (24 bytes) or 1x2 4x8x2 (24 bytes)
dword offset nf11 ; 8x8x8 (64 bytes)
dword offset nf12 ; low 4x4x8 (16 bytes)
dword offset nf13 ; 2x2 4x4x0 (ie 2x2x8) (4 bytes)
dword offset nf14 ; 8x8x0 (1 byte)
dword offset nf15 ; mix 8x8x0 (2 bytes)
.code
NF_DECOMP_INIT 0
mov eax, nf_back_right
sub eax, SWIDTH
mov nfpk_back_right, eax
mov esi, comp
mov edi, tbuf
nf_StartRow:
mov eax, w
shr eax, 1
mov wcnt,eax
ALIGN 4
nf_NextPair:
dec wcnt
js nf_NextRow
mov ebx, ops
mov al, [ebx]
inc ebx
mov ops, ebx
xor ebx, ebx
mov bl, al
shr bl, 4
and eax, 0Fh
push offset nf_NextPair
push nfpk_OpTbl[ebx*4]
jmp nfpk_OpTbl[eax*4]
nf_NextRow:
add edi, new_row
dec h
jnz nf_StartRow
LOG_LABEL "EndPkDecomp"
ret
;----------------------------------------
ALIGN 4
nf0: ; No change from previous buffer
mov eax, DiffBufPtrs
jmp nf_shift
; \ Diagonal blend
;0 1
; 00010101 1
; 00001313 2
; 20303101 3
; 02030313 4
; 23203031 5
; 02020333 6
; 23232333 7
;2 22023233 8
;
nf0_d:
; 3412 (low to high)
;------
mov al, bl ; 0001 (1)
mov ah, bh
shl eax, 16
mov al, bl
mov ah, bl
mov [edi], eax
mov ah, bh ; 0101
mov [edi+4], eax
add edi, edx
mov ah, bl ; 0000 (2)
rol eax, 16
mov ah, bl
mov [edi], eax
mov al, bh ; 1313
mov ah, ch
shl eax, 16
mov al, bh
mov ah, ch
mov [edi+4], eax
add edi, edx
mov al, ch ; 2030 (3)
mov ah, bl
shl eax, 16
mov al, cl
mov ah, bl
mov [edi], eax
mov al, bl ; 3101
mov ah, bh
shl eax, 16
mov al, ch
mov ah, bh
mov [edi+4], eax
add edi, edx
mov al, bl ; 0203 (4)
mov ah, ch
shl eax, 16
mov al, bl
mov ah, cl
mov [edi], eax
mov al, bh ; 0313
mov ah, ch
rol eax, 16
mov [edi+4], eax
add edi, edx
mov al, cl ; 2320 (5)
mov ah, bl
shl eax, 16
mov al, cl
mov ah, ch
mov [edi], eax
mov al, ch ; 3031
mov ah, bh
shl eax, 16
mov al, ch
mov ah, bl
mov [edi+4], eax
add edi, edx
mov al, bl ; 0202 (6)
mov ah, cl
shl eax, 16
mov al, bl
mov ah, cl
mov [edi], eax
mov ah, ch ; 0333
shl eax, 16
mov al, ch
mov ah, ch
mov [edi+4], eax
add edi, edx
mov al, cl ; 2323 (7)
rol eax, 16
mov al, cl
mov [edi], eax
mov al, ch ; 2333
rol eax, 16
mov [edi+4], eax
add edi, edx
mov al, bl ; 2202 (8)
mov ah, cl
shl eax, 16
mov al, cl
mov ah, cl
mov [edi], eax
mov al, ch ; 3233
mov ah, ch
shl eax, 16
mov al, ch
mov ah, cl
mov [edi+4], eax
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
; / RDiagonal blend
;0 1
; 01010111 1
; 20201111 2
; 01021313 3
; 20212131 4
; 02121323 5
; 22213131 6
; 22232323 7
;2 22323133 8
;
nf0_r:
; 3412 (low to high)
;------
mov al, bl ; 0101 (1)
mov ah, bh
shl eax, 16
mov al, bl
mov ah, bh
mov [edi], eax
mov al, bh ; 0111
rol eax, 16
mov [edi+4], eax
add edi, edx
mov al, cl ; 2020 (2)
mov ah, bl
shl eax, 16
mov al, cl
mov ah, bl
mov [edi], eax
mov al, bh ; 1111
mov ah, bh
shl eax, 16
mov al, bh
mov ah, bh
mov [edi+4], eax
add edi, edx
mov al, bl ; 0102 (3)
mov ah, cl
rol eax, 16
mov al, bl
mov [edi], eax
mov al, bh ; 1313
mov ah, ch
shl eax, 16
mov al, bh
mov ah, ch
mov [edi+4], eax
add edi, edx
mov al, cl ; 2021 (4)
mov ah, bh
shl eax, 16
mov al, cl
mov ah, bl
mov [edi], eax
mov al, ch ; 2131
mov ah, bh
rol eax, 16
mov [edi+4], eax
add edi, edx
ror eax, 8 ; 0212 (5)
mov al, bl
mov ah, cl
mov [edi], eax
mov al, cl ; 1323
mov ah, ch
shl eax, 16
mov al, bh
mov ah, ch
mov [edi+4], eax
add edi, edx
mov al, cl ; 2221 (6)
mov ah, bh
shl eax, 16
mov al, cl
mov ah, cl
mov [edi], eax
mov al, ch ; 3131
mov ah, bh
shl eax, 16
mov al, ch
mov ah, bh
mov [edi+4], eax
add edi, edx
mov al, cl ; 2223 (7)
mov ah, ch
shl eax, 16
mov al, cl
mov ah, cl
mov [edi], eax
mov ah, ch ; 2323
rol eax, 16
mov [edi+4], eax
add edi, edx
rol eax, 8 ; 2232 (8)
mov al, cl
mov [edi], eax
mov al, ch ; 3133
mov ah, ch
shl eax, 16
mov al, ch
mov ah, bh
mov [edi+4], eax
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
;----------------------------------------
ALIGN 4
nf1: ; No change (and copied to screen)
add edi, SWIDTH
retn
;----------------------------------------
ALIGN 4
nf2: ; Near shift from older part of current buffer
xor eax, eax
mov al, [esi]
inc esi
mov ax, nfpk_ShiftP2[eax*2]
nf_xyc_shift:
xor ebx, ebx
mov bl, ah
shl eax, 24
sar eax, 24
add eax, nfpk_ShiftY[ebx*4]
jmp nf_shift
;----------------------------------------
ALIGN 4
nf3: ; Near shift from newer part of current buffer
xor eax, eax
mov al, [esi]
inc esi
mov ax, nfpk_ShiftP2[eax*2]
neg al
neg ah
jmp nf_xyc_shift
;----------------------------------------
ALIGN 4
nf4: ; Near shift from previous buffer
xor eax, eax
mov al, [esi]
inc esi
mov ax, nfpk_ShiftP1[eax*2]
jmp nf_xyp_shift
;----------------------------------------
ALIGN 4
nf5: ; Far shift from previous buffer
mov ax, [esi]
add esi, 2
nf_xyp_shift:
xor ebx, ebx
mov bl, ah
shl eax, 24
sar eax, 24
add eax, nfpk_ShiftY[ebx*4]
add eax, DiffBufPtrs
jmp nf_shift
;----------------------------------------
ALIGN 4
nf6: ; Run of no changes (must only appear in first nibble opcodes)
; Next nibble k specifies 2k+4 squares with no changes
add esp, 4 ; Next nibble is not an opcode
add ebx, 2 ; (minimum of 4 squares)
ALIGN 4
nf6a: add edi, SWIDTH*2 ; Advance over two squares
dec ebx
jz nf6z ; Last pair of squares
dec wcnt ; Same row?
jns nf6a ; Yes
add edi, new_row ; Advance to next row
dec h ; Decrement row count (should never become zero here)
mov eax, w ; Reset wcnt
shr eax ,1
dec eax
mov wcnt, eax
jmp nf6a
nf6z: retn
;----------------------------------------
ALIGN 4
nf_shift:
if 0 ;debug
mov eax, 0
mov ebx, eax
jmp nf_solid
endif
mov ebx, esi ; save esi
lea esi, [edi+eax]
mov edx, nf_width
REPEAT 7
mov eax, [esi]
mov [edi], eax
mov eax, [esi+4]
mov [edi+4], eax
add esi, edx
add edi, edx
ENDM
mov eax, [esi]
mov [edi], eax
mov eax, [esi+4]
mov [edi+4], eax
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
mov esi, ebx ; restore esi
retn
;----------------------------------------
ALIGN 4
nf7: ; 8x8x1 (10 bytes)
mov ax, [esi]
cmp al, ah
ja nf23
if 0 ;debug
add esi, 10
mov eax, 0fefefefeH
mov ebx, eax
jmp nf_solid
endif
xor eax, eax
lea ecx, nfpk_mov8
lea edx, byte ptr ds:nf7_11+2
mov al, [esi+2]
mov ebx, [ecx+eax*4]
mov [edx+(nf7_11-nf7_11)], bl
mov [edx+(nf7_12-nf7_11)], bh
shr ebx, 16
mov [edx+(nf7_13-nf7_11)], bl
mov [edx+(nf7_14-nf7_11)], bh
mov al, [esi+3]
mov ebx, [ecx+eax*4]
mov [edx+(nf7_21-nf7_11)], bl
mov [edx+(nf7_22-nf7_11)], bh
shr ebx, 16
mov [edx+(nf7_23-nf7_11)], bl
mov [edx+(nf7_24-nf7_11)], bh
mov al, [esi+4]
mov ebx, [ecx+eax*4]
mov [edx+(nf7_31-nf7_11)], bl
mov [edx+(nf7_32-nf7_11)], bh
shr ebx, 16
mov [edx+(nf7_33-nf7_11)], bl
mov [edx+(nf7_34-nf7_11)], bh
mov al, [esi+5]
mov ebx, [ecx+eax*4]
mov [edx+(nf7_41-nf7_11)], bl
mov [edx+(nf7_42-nf7_11)], bh
shr ebx, 16
mov [edx+(nf7_43-nf7_11)], bl
mov [edx+(nf7_44-nf7_11)], bh
lea edx, [edx+(nf7_51-nf7_11)]
mov al, [esi+6]
mov ebx, [ecx+eax*4]
mov [edx+(nf7_51-nf7_51)], bl
mov [edx+(nf7_52-nf7_51)], bh
shr ebx, 16
mov [edx+(nf7_53-nf7_51)], bl
mov [edx+(nf7_54-nf7_51)], bh
mov al, [esi+7]
mov ebx, [ecx+eax*4]
mov [edx+(nf7_61-nf7_51)], bl
mov [edx+(nf7_62-nf7_51)], bh
shr ebx, 16
mov [edx+(nf7_63-nf7_51)], bl
mov [edx+(nf7_64-nf7_51)], bh
mov al, [esi+8]
mov ebx, [ecx+eax*4]
mov [edx+(nf7_71-nf7_51)], bl
mov [edx+(nf7_72-nf7_51)], bh
shr ebx, 16
mov [edx+(nf7_73-nf7_51)], bl
mov [edx+(nf7_74-nf7_51)], bh
mov al, [esi+9]
mov ebx, [ecx+eax*4]
mov [edx+(nf7_81-nf7_51)], bl
mov [edx+(nf7_82-nf7_51)], bh
shr ebx, 16
mov [edx+(nf7_83-nf7_51)], bl
mov [edx+(nf7_84-nf7_51)], bh
push ebp
push esi
; load bx,dx,cx,bp with 00,01,10,11 color combinations
; (note that bits are read least significant first).
mov cx, [esi]
mov esi,nf_width
mov bl,cl
mov bh,cl
mov dl,ch
mov dh,cl
mov al,ch
mov ah,ch
mov ebp,eax
jmp nf7_0 ; flush prefetch
ALIGN 4
nf7_0:
nf7_11: mov ax, bx
shl eax, 16
nf7_12: mov ax, bx
mov [edi], eax
nf7_13: mov ax, bx
shl eax, 16
nf7_14: mov ax, bx
mov [edi+4], eax
add edi, esi
nf7_21: mov ax, bx
shl eax, 16
nf7_22: mov ax, bx
mov [edi], eax
nf7_23: mov ax, bx
shl eax, 16
nf7_24: mov ax, bx
mov [edi+4], eax
add edi, esi
nf7_31: mov ax, bx
shl eax, 16
nf7_32: mov ax, bx
mov [edi], eax
nf7_33: mov ax, bx
shl eax, 16
nf7_34: mov ax, bx
mov [edi+4], eax
add edi, esi
nf7_41: mov ax, bx
shl eax, 16
nf7_42: mov ax, bx
mov [edi], eax
nf7_43: mov ax, bx
shl eax, 16
nf7_44: mov ax, bx
mov [edi+4], eax
add edi, esi
nf7_51: mov ax, bx
shl eax, 16
nf7_52: mov ax, bx
mov [edi], eax
nf7_53: mov ax, bx
shl eax, 16
nf7_54: mov ax, bx
mov [edi+4], eax
add edi, esi
nf7_61: mov ax, bx
shl eax, 16
nf7_62: mov ax, bx
mov [edi], eax
nf7_63: mov ax, bx
shl eax, 16
nf7_64: mov ax, bx
mov [edi+4], eax
add edi, esi
nf7_71: mov ax, bx
shl eax, 16
nf7_72: mov ax, bx
mov [edi], eax
nf7_73: mov ax, bx
shl eax, 16
nf7_74: mov ax, bx
mov [edi+4], eax
add edi, esi
nf7_81: mov ax, bx
shl eax, 16
nf7_82: mov ax, bx
mov [edi], eax
nf7_83: mov ax, bx
shl eax, 16
nf7_84: mov ax, bx
mov [edi+4], eax
pop esi
pop ebp
add esi, 10
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
;----------------------------------------
ALIGN 4
;nf7+16
nf23: ; low 4x4x1 (4 bytes)
xor eax, eax
lea ecx, nfpk_mov4l
lea edx, byte ptr ds:nf23_11+2
mov al, [esi+2]
and al, 0fH
mov ebx, [ecx+eax*4]
mov [edx+(nf23_11-nf23_11)], bl
mov [edx+(nf23_12-nf23_11)], bh
shr ebx, 16
mov [edx+(nf23_13-nf23_11)], bl
mov [edx+(nf23_14-nf23_11)], bh
mov al, [esi+2]
shr al, 4
mov ebx, [ecx+eax*4]
mov [edx+(nf23_31-nf23_11)], bl
mov [edx+(nf23_32-nf23_11)], bh
shr ebx, 16
mov [edx+(nf23_33-nf23_11)], bl
mov [edx+(nf23_34-nf23_11)], bh
mov al, [esi+3]
and al, 0fH
mov ebx, [ecx+eax*4]
mov [edx+(nf23_51-nf23_11)], bl
mov [edx+(nf23_52-nf23_11)], bh
shr ebx, 16
mov [edx+(nf23_53-nf23_11)], bl
mov [edx+(nf23_54-nf23_11)], bh
mov al, [esi+3]
shr al, 4
mov ebx, [ecx+eax*4]
mov [edx+(nf23_71-nf23_11)], bl
mov [edx+(nf23_72-nf23_11)], bh
shr ebx, 16
mov [edx+(nf23_73-nf23_11)], bl
mov [edx+(nf23_74-nf23_11)], bh
mov edx, nf_width
; load bx,cx with 00,11 color combinations
mov bx, [esi]
mov cl, bh
mov bh, bl
mov ch, cl
jmp nf23_0 ; flush prefetch
ALIGN 4
nf23_0:
nf23_11:mov ax, bx
shl eax, 16
nf23_12:mov ax, bx
mov [edi], eax
mov [edi+edx], eax
nf23_13:mov ax, bx
shl eax, 16
nf23_14:mov ax, bx
mov [edi+4], eax
mov [edi+edx+4], eax
lea edi, [edi+edx*2]
nf23_31:mov ax, bx
shl eax, 16
nf23_32:mov ax, bx
mov [edi], eax
mov [edi+edx], eax
nf23_33:mov ax, bx
shl eax, 16
nf23_34:mov ax, bx
mov [edi+4], eax
mov [edi+edx+4], eax
lea edi, [edi+edx*2]
nf23_51:mov ax, bx
shl eax, 16
nf23_52:mov ax, bx
mov [edi], eax
mov [edi+edx], eax
nf23_53:mov ax, bx
shl eax, 16
nf23_54:mov ax, bx
mov [edi+4], eax
mov [edi+edx+4], eax
lea edi, [edi+edx*2]
nf23_71:mov ax, bx
shl eax, 16
nf23_72:mov ax, bx
mov [edi], eax
mov [edi+edx], eax
nf23_73:mov ax, bx
shl eax, 16
nf23_74:mov ax, bx
mov [edi+4], eax
add edi, edx
mov [edi+4], eax
sub edi, nfpk_back_right
add esi, 4
retn
;----------------------------------------
ALIGN 4
nf8: ; 2x2 4x4x1 (16 bytes)
mov ax, [esi]
cmp al, ah
ja nf24
xor eax, eax
lea ecx, nfpk_mov8
lea edx, byte ptr ds:nf8_11+2
mov al, [esi+2]
mov ebx, [ecx+eax*4]
mov [edx+(nf8_11-nf8_11)], bl
mov [edx+(nf8_12-nf8_11)], bh
shr ebx, 16
mov [edx+(nf8_13-nf8_11)], bl
mov [edx+(nf8_14-nf8_11)], bh
mov al, [esi+3]
mov ebx, [ecx+eax*4]
mov [edx+(nf8_21-nf8_11)], bl
mov [edx+(nf8_22-nf8_11)], bh
shr ebx, 16
mov [edx+(nf8_23-nf8_11)], bl
mov [edx+(nf8_24-nf8_11)], bh
mov al, [esi+6]
mov ebx, [ecx+eax*4]
mov [edx+(nf8_31-nf8_11)], bl
mov [edx+(nf8_32-nf8_11)], bh
shr ebx, 16
mov [edx+(nf8_33-nf8_11)], bl
mov [edx+(nf8_34-nf8_11)], bh
mov al, [esi+7]
mov ebx, [ecx+eax*4]
mov [edx+(nf8_41-nf8_11)], bl
mov [edx+(nf8_42-nf8_11)], bh
shr ebx, 16
mov [edx+(nf8_43-nf8_11)], bl
mov [edx+(nf8_44-nf8_11)], bh
add edx, nf8_51-nf8_11
mov al, [esi+10]
mov ebx, [ecx+eax*4]
mov [edx+(nf8_51-nf8_51)], bl
mov [edx+(nf8_52-nf8_51)], bh
shr ebx, 16
mov [edx+(nf8_53-nf8_51)], bl
mov [edx+(nf8_54-nf8_51)], bh
mov al, [esi+11]
mov ebx, [ecx+eax*4]
mov [edx+(nf8_61-nf8_51)], bl
mov [edx+(nf8_62-nf8_51)], bh
shr ebx, 16
mov [edx+(nf8_63-nf8_51)], bl
mov [edx+(nf8_64-nf8_51)], bh
mov al, [esi+14]
mov ebx, [ecx+eax*4]
mov [edx+(nf8_71-nf8_51)], bl
mov [edx+(nf8_72-nf8_51)], bh
shr ebx, 16
mov [edx+(nf8_73-nf8_51)], bl
mov [edx+(nf8_74-nf8_51)], bh
mov al, [esi+15]
mov ebx, [ecx+eax*4]
mov [edx+(nf8_81-nf8_51)], bl
mov [edx+(nf8_82-nf8_51)], bh
shr ebx, 16
mov [edx+(nf8_83-nf8_51)], bl
mov [edx+(nf8_84-nf8_51)], bh
push ebp
push esi
; load bx,dx,cx,bp with 00,01,10,11 color combinations
; (note that bits are read least significant first).
mov cx, [esi]
mov esi, nf_width
mov bl,cl
mov bh,cl
mov dl,ch
mov dh,cl
mov al,ch
mov ah,ch
mov ebp,eax
jmp nf8_0 ; flush prefetch
ALIGN 4
nf8_0:
nf8_11: mov ax, bx
shl eax, 16
nf8_12: mov ax, bx
mov [edi], eax
add edi, esi
nf8_13: mov ax, bx
shl eax, 16
nf8_14: mov ax, bx
mov [edi], eax
add edi, esi
nf8_21: mov ax, bx
shl eax, 16
nf8_22: mov ax, bx
mov [edi], eax
add edi, esi
nf8_23: mov ax, bx
shl eax, 16
nf8_24: mov ax, bx
mov [edi], eax
add edi, esi
mov eax, [esp]
mov cx, [eax+4]
mov bl,cl
mov bh,cl
mov dl,ch
mov dh,cl
mov al,ch
mov ah,ch
mov ebp,eax
nf8_31: mov ax, bx
shl eax, 16
nf8_32: mov ax, bx
mov [edi], eax
add edi, esi
nf8_33: mov ax, bx
shl eax, 16
nf8_34: mov ax, bx
mov [edi], eax
add edi, esi
nf8_41: mov ax, bx
shl eax, 16
nf8_42: mov ax, bx
mov [edi], eax
add edi, esi
nf8_43: mov ax, bx
shl eax, 16
nf8_44: mov ax, bx
mov [edi], eax
add edi, esi
lea eax, [esi*8-4]
sub edi, eax
mov eax, [esp]
mov cx, [eax+8]
mov bl,cl
mov bh,cl
mov dl,ch
mov dh,cl
mov al,ch
mov ah,ch
mov ebp,eax
nf8_51: mov ax, bx
shl eax, 16
nf8_52: mov ax, bx
mov [edi], eax
add edi, esi
nf8_53: mov ax, bx
shl eax, 16
nf8_54: mov ax, bx
mov [edi], eax
add edi, esi
nf8_61: mov ax, bx
shl eax, 16
nf8_62: mov ax, bx
mov [edi], eax
add edi, esi
nf8_63: mov ax, bx
shl eax, 16
nf8_64: mov ax, bx
mov [edi], eax
add edi, esi
mov eax, [esp]
mov cx, [eax+12]
mov bl,cl
mov bh,cl
mov dl,ch
mov dh,cl
mov al,ch
mov ah,ch
mov ebp,eax
nf8_71: mov ax, bx
shl eax, 16
nf8_72: mov ax, bx
mov [edi], eax
add edi, esi
nf8_73: mov ax, bx
shl eax, 16
nf8_74: mov ax, bx
mov [edi], eax
add edi, esi
nf8_81: mov ax, bx
shl eax, 16
nf8_82: mov ax, bx
mov [edi], eax
add edi, esi
nf8_83: mov ax, bx
shl eax, 16
nf8_84: mov ax, bx
mov [edi], eax
pop esi
pop ebp
add esi, 16
sub edi, 4
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
;----------------------------------------
ALIGN 4
;nf8+16
nf24: ; 2x1 4x8x1 (12 bytes)
mov ax, [esi+6]
cmp al, ah
ja nf40
xor eax, eax
lea ecx, nfpk_mov8
lea edx, byte ptr ds:nf24_11+2
mov al, [esi+2]
mov ebx, [ecx+eax*4]
mov [edx+(nf24_11-nf24_11)], bl
mov [edx+(nf24_12-nf24_11)], bh
shr ebx, 16
mov [edx+(nf24_13-nf24_11)], bl
mov [edx+(nf24_14-nf24_11)], bh
mov al, [esi+3]
mov ebx, [ecx+eax*4]
mov [edx+(nf24_21-nf24_11)], bl
mov [edx+(nf24_22-nf24_11)], bh
shr ebx, 16
mov [edx+(nf24_23-nf24_11)], bl
mov [edx+(nf24_24-nf24_11)], bh
mov al, [esi+4]
mov ebx, [ecx+eax*4]
mov [edx+(nf24_31-nf24_11)], bl
mov [edx+(nf24_32-nf24_11)], bh
shr ebx, 16
mov [edx+(nf24_33-nf24_11)], bl
mov [edx+(nf24_34-nf24_11)], bh
mov al, [esi+5]
mov ebx, [ecx+eax*4]
mov [edx+(nf24_41-nf24_11)], bl
mov [edx+(nf24_42-nf24_11)], bh
shr ebx, 16
mov [edx+(nf24_43-nf24_11)], bl
mov [edx+(nf24_44-nf24_11)], bh
add edx, nf24_51-nf24_11
mov al, [esi+8]
mov ebx, [ecx+eax*4]
mov [edx+(nf24_51-nf24_51)], bl
mov [edx+(nf24_52-nf24_51)], bh
shr ebx, 16
mov [edx+(nf24_53-nf24_51)], bl
mov [edx+(nf24_54-nf24_51)], bh
mov al, [esi+9]
mov ebx, [ecx+eax*4]
mov [edx+(nf24_61-nf24_51)], bl
mov [edx+(nf24_62-nf24_51)], bh
shr ebx, 16
mov [edx+(nf24_63-nf24_51)], bl
mov [edx+(nf24_64-nf24_51)], bh
mov al, [esi+10]
mov ebx, [ecx+eax*4]
mov [edx+(nf24_71-nf24_51)], bl
mov [edx+(nf24_72-nf24_51)], bh
shr ebx, 16
mov [edx+(nf24_73-nf24_51)], bl
mov [edx+(nf24_74-nf24_51)], bh
mov al, [esi+11]
mov ebx, [ecx+eax*4]
mov [edx+(nf24_81-nf24_51)], bl
mov [edx+(nf24_82-nf24_51)], bh
shr ebx, 16
mov [edx+(nf24_83-nf24_51)], bl
mov [edx+(nf24_84-nf24_51)], bh
push ebp
push esi
; load bx,dx,cx,bp with 00,01,10,11 color combinations
; (note that bits are read least significant first).
mov cx, [esi]
mov esi, nf_width
mov bl,cl
mov bh,cl
mov dl,ch
mov dh,cl
mov al,ch
mov ah,ch
mov ebp,eax
jmp nf24_0 ; flush prefetch
ALIGN 4
nf24_0:
nf24_11:mov ax, bx
shl eax, 16
nf24_12:mov ax, bx
mov [edi], eax
add edi, esi
nf24_13:mov ax, bx
shl eax, 16
nf24_14:mov ax, bx
mov [edi], eax
add edi, esi
nf24_21:mov ax, bx
shl eax, 16
nf24_22:mov ax, bx
mov [edi], eax
add edi, esi
nf24_23:mov ax, bx
shl eax, 16
nf24_24:mov ax, bx
mov [edi], eax
add edi, esi
nf24_31:mov ax, bx
shl eax, 16
nf24_32:mov ax, bx
mov [edi], eax
add edi, esi
nf24_33:mov ax, bx
shl eax, 16
nf24_34:mov ax, bx
mov [edi], eax
add edi, esi
nf24_41:mov ax, bx
shl eax, 16
nf24_42:mov ax, bx
mov [edi], eax
add edi, esi
nf24_43:mov ax, bx
shl eax, 16
nf24_44:mov ax, bx
mov [edi], eax
add edi, esi
lea eax, [esi*8-4]
sub edi, eax
mov eax, [esp]
mov cx, [eax+6]
mov bl,cl
mov bh,cl
mov dl,ch
mov dh,cl
mov al,ch
mov ah,ch
mov ebp,eax
nf24_51:mov ax, bx
shl eax, 16
nf24_52:mov ax, bx
mov [edi], eax
add edi, esi
nf24_53:mov ax, bx
shl eax, 16
nf24_54:mov ax, bx
mov [edi], eax
add edi, esi
nf24_61:mov ax, bx
shl eax, 16
nf24_62:mov ax, bx
mov [edi], eax
add edi, esi
nf24_63:mov ax, bx
shl eax, 16
nf24_64:mov ax, bx
mov [edi], eax
add edi, esi
nf24_71:mov ax, bx
shl eax, 16
nf24_72:mov ax, bx
mov [edi], eax
add edi, esi
nf24_73:mov ax, bx
shl eax, 16
nf24_74:mov ax, bx
mov [edi], eax
add edi, esi
nf24_81:mov ax, bx
shl eax, 16
nf24_82:mov ax, bx
mov [edi], eax
add edi, esi
nf24_83:mov ax, bx
shl eax, 16
nf24_84:mov ax, bx
mov [edi], eax
pop esi
pop ebp
add esi, 12
sub edi, 4
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
;----------------------------------------
ALIGN 4
;nf8+32
nf40: ; 1x2 8x4x1 (12 bytes)
xor eax, eax
lea ecx, nfpk_mov8
lea edx, byte ptr ds:nf40_11+2
mov al, [esi+2]
mov ebx, [ecx+eax*4]
mov [edx+(nf40_11-nf40_11)], bl
mov [edx+(nf40_12-nf40_11)], bh
shr ebx, 16
mov [edx+(nf40_13-nf40_11)], bl
mov [edx+(nf40_14-nf40_11)], bh
mov al, [esi+3]
mov ebx, [ecx+eax*4]
mov [edx+(nf40_21-nf40_11)], bl
mov [edx+(nf40_22-nf40_11)], bh
shr ebx, 16
mov [edx+(nf40_23-nf40_11)], bl
mov [edx+(nf40_24-nf40_11)], bh
mov al, [esi+4]
mov ebx, [ecx+eax*4]
mov [edx+(nf40_31-nf40_11)], bl
mov [edx+(nf40_32-nf40_11)], bh
shr ebx, 16
mov [edx+(nf40_33-nf40_11)], bl
mov [edx+(nf40_34-nf40_11)], bh
mov al, [esi+5]
mov ebx, [ecx+eax*4]
mov [edx+(nf40_41-nf40_11)], bl
mov [edx+(nf40_42-nf40_11)], bh
shr ebx, 16
mov [edx+(nf40_43-nf40_11)], bl
mov [edx+(nf40_44-nf40_11)], bh
add edx, nf40_51-nf40_11
mov al, [esi+8]
mov ebx, [ecx+eax*4]
mov [edx+(nf40_51-nf40_51)], bl
mov [edx+(nf40_52-nf40_51)], bh
shr ebx, 16
mov [edx+(nf40_53-nf40_51)], bl
mov [edx+(nf40_54-nf40_51)], bh
mov al, [esi+9]
mov ebx, [ecx+eax*4]
mov [edx+(nf40_61-nf40_51)], bl
mov [edx+(nf40_62-nf40_51)], bh
shr ebx, 16
mov [edx+(nf40_63-nf40_51)], bl
mov [edx+(nf40_64-nf40_51)], bh
mov al, [esi+10]
mov ebx, [ecx+eax*4]
mov [edx+(nf40_71-nf40_51)], bl
mov [edx+(nf40_72-nf40_51)], bh
shr ebx, 16
mov [edx+(nf40_73-nf40_51)], bl
mov [edx+(nf40_74-nf40_51)], bh
mov al, [esi+11]
mov ebx, [ecx+eax*4]
mov [edx+(nf40_81-nf40_51)], bl
mov [edx+(nf40_82-nf40_51)], bh
shr ebx, 16
mov [edx+(nf40_83-nf40_51)], bl
mov [edx+(nf40_84-nf40_51)], bh
push ebp
push esi
; load bx,dx,cx,bp with 00,01,10,11 color combinations
; (note that bits are read least significant first).
mov cx, [esi]
mov esi, nf_width
mov bl,cl
mov bh,cl
mov dl,ch
mov dh,cl
mov al,ch
mov ah,ch
mov ebp,eax
jmp nf40_0 ; flush prefetch
ALIGN 4
nf40_0:
nf40_11:mov ax, bx
shl eax, 16
nf40_12:mov ax, bx
mov [edi], eax
nf40_13:mov ax, bx
shl eax, 16
nf40_14:mov ax, bx
mov [edi+4], eax
add edi, esi
nf40_21:mov ax, bx
shl eax, 16
nf40_22:mov ax, bx
mov [edi], eax
nf40_23:mov ax, bx
shl eax, 16
nf40_24:mov ax, bx
mov [edi+4], eax
add edi, esi
nf40_31:mov ax, bx
shl eax, 16
nf40_32:mov ax, bx
mov [edi], eax
nf40_33:mov ax, bx
shl eax, 16
nf40_34:mov ax, bx
mov [edi+4], eax
add edi, esi
nf40_41:mov ax, bx
shl eax, 16
nf40_42:mov ax, bx
mov [edi], eax
nf40_43:mov ax, bx
shl eax, 16
nf40_44:mov ax, bx
mov [edi+4], eax
add edi, esi
mov eax, [esp]
mov cx, [eax+6]
mov bl,cl
mov bh,cl
mov dl,ch
mov dh,cl
mov al,ch
mov ah,ch
mov ebp,eax
nf40_51:mov ax, bx
shl eax, 16
nf40_52:mov ax, bx
mov [edi], eax
nf40_53:mov ax, bx
shl eax, 16
nf40_54:mov ax, bx
mov [edi+4], eax
add edi, esi
nf40_61:mov ax, bx
shl eax, 16
nf40_62:mov ax, bx
mov [edi], eax
nf40_63:mov ax, bx
shl eax, 16
nf40_64:mov ax, bx
mov [edi+4], eax
add edi, esi
nf40_71:mov ax, bx
shl eax, 16
nf40_72:mov ax, bx
mov [edi], eax
nf40_73:mov ax, bx
shl eax, 16
nf40_74:mov ax, bx
mov [edi+4], eax
add edi, esi
nf40_81:mov ax, bx
shl eax, 16
nf40_82:mov ax, bx
mov [edi], eax
nf40_83:mov ax, bx
shl eax, 16
nf40_84:mov ax, bx
mov [edi+4], eax
pop esi
pop ebp
add esi, 12
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
;----------------------------------------
ALIGN 4
nf9: ; 8x8x2 (20 bytes)
mov eax, [esi]
cmp al, ah
ja nf41
shr eax, 16
cmp al, ah
ja nf25
xor eax, eax
lea ecx, nfpk_mov4
lea edx, byte ptr ds:nf9_11+1
mov al, [esi+4]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_11-nf9_11)], bl
mov [edx+(nf9_12-nf9_11)], bh
shr ebx, 16
mov [edx+(nf9_13-nf9_11)], bl
mov [edx+(nf9_14-nf9_11)], bh
mov al, [esi+5]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_15-nf9_11)], bl
mov [edx+(nf9_16-nf9_11)], bh
shr ebx, 16
mov [edx+(nf9_17-nf9_11)], bl
mov [edx+(nf9_18-nf9_11)], bh
mov al, [esi+6]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_21-nf9_11)], bl
mov [edx+(nf9_22-nf9_11)], bh
shr ebx, 16
mov [edx+(nf9_23-nf9_11)], bl
mov [edx+(nf9_24-nf9_11)], bh
mov al, [esi+7]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_25-nf9_11)], bl
mov [edx+(nf9_26-nf9_11)], bh
shr ebx, 16
mov [edx+(nf9_27-nf9_11)], bl
mov [edx+(nf9_28-nf9_11)], bh
mov al, [esi+8]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_31-nf9_11)], bl
mov [edx+(nf9_32-nf9_11)], bh
shr ebx, 16
mov [edx+(nf9_33-nf9_11)], bl
mov [edx+(nf9_34-nf9_11)], bh
mov al, [esi+9]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_35-nf9_11)], bl
mov [edx+(nf9_36-nf9_11)], bh
shr ebx, 16
mov [edx+(nf9_37-nf9_11)], bl
mov [edx+(nf9_38-nf9_11)], bh
mov al, [esi+10]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_41-nf9_11)], bl
mov [edx+(nf9_42-nf9_11)], bh
shr ebx, 16
mov [edx+(nf9_43-nf9_11)], bl
mov [edx+(nf9_44-nf9_11)], bh
mov al, [esi+11]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_45-nf9_11)], bl
mov [edx+(nf9_46-nf9_11)], bh
shr ebx, 16
mov [edx+(nf9_47-nf9_11)], bl
mov [edx+(nf9_48-nf9_11)], bh
lea edx, [edx+(nf9_51-nf9_11)]
mov al, [esi+12]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_51-nf9_51)], bl
mov [edx+(nf9_52-nf9_51)], bh
shr ebx, 16
mov [edx+(nf9_53-nf9_51)], bl
mov [edx+(nf9_54-nf9_51)], bh
mov al, [esi+13]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_55-nf9_51)], bl
mov [edx+(nf9_56-nf9_51)], bh
shr ebx, 16
mov [edx+(nf9_57-nf9_51)], bl
mov [edx+(nf9_58-nf9_51)], bh
mov al, [esi+14]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_61-nf9_51)], bl
mov [edx+(nf9_62-nf9_51)], bh
shr ebx, 16
mov [edx+(nf9_63-nf9_51)], bl
mov [edx+(nf9_64-nf9_51)], bh
mov al, [esi+15]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_65-nf9_51)], bl
mov [edx+(nf9_66-nf9_51)], bh
shr ebx, 16
mov [edx+(nf9_67-nf9_51)], bl
mov [edx+(nf9_68-nf9_51)], bh
mov al, [esi+16]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_71-nf9_51)], bl
mov [edx+(nf9_72-nf9_51)], bh
shr ebx, 16
mov [edx+(nf9_73-nf9_51)], bl
mov [edx+(nf9_74-nf9_51)], bh
mov al, [esi+17]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_75-nf9_51)], bl
mov [edx+(nf9_76-nf9_51)], bh
shr ebx, 16
mov [edx+(nf9_77-nf9_51)], bl
mov [edx+(nf9_78-nf9_51)], bh
mov al, [esi+18]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_81-nf9_51)], bl
mov [edx+(nf9_82-nf9_51)], bh
shr ebx, 16
mov [edx+(nf9_83-nf9_51)], bl
mov [edx+(nf9_84-nf9_51)], bh
mov al, [esi+19]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_85-nf9_51)], bl
mov [edx+(nf9_86-nf9_51)], bh
shr ebx, 16
mov [edx+(nf9_87-nf9_51)], bl
mov [edx+(nf9_88-nf9_51)], bh
; Load bl,bh,cl,ch with four colors
mov bx, [esi]
mov cx, [esi+2]
mov edx, nf_width
jmp nf9_0 ; flush prefetch
ALIGN 4
nf9_0:
nf9_11: mov al, bl
nf9_12: mov ah, bl
shl eax, 16
nf9_13: mov al, bl
nf9_14: mov ah, bl
mov [edi], eax
nf9_15: mov al, bl
nf9_16: mov ah, bl
shl eax, 16
nf9_17: mov al, bl
nf9_18: mov ah, bl
mov [edi+4], eax
add edi, edx
nf9_21: mov al, bl
nf9_22: mov ah, bl
shl eax, 16
nf9_23: mov al, bl
nf9_24: mov ah, bl
mov [edi], eax
nf9_25: mov al, bl
nf9_26: mov ah, bl
shl eax, 16
nf9_27: mov al, bl
nf9_28: mov ah, bl
mov [edi+4], eax
add edi, edx
nf9_31: mov al, bl
nf9_32: mov ah, bl
shl eax, 16
nf9_33: mov al, bl
nf9_34: mov ah, bl
mov [edi], eax
nf9_35: mov al, bl
nf9_36: mov ah, bl
shl eax, 16
nf9_37: mov al, bl
nf9_38: mov ah, bl
mov [edi+4], eax
add edi, edx
nf9_41: mov al, bl
nf9_42: mov ah, bl
shl eax, 16
nf9_43: mov al, bl
nf9_44: mov ah, bl
mov [edi], eax
nf9_45: mov al, bl
nf9_46: mov ah, bl
shl eax, 16
nf9_47: mov al, bl
nf9_48: mov ah, bl
mov [edi+4], eax
add edi, edx
nf9_51: mov al, bl
nf9_52: mov ah, bl
shl eax, 16
nf9_53: mov al, bl
nf9_54: mov ah, bl
mov [edi], eax
nf9_55: mov al, bl
nf9_56: mov ah, bl
shl eax, 16
nf9_57: mov al, bl
nf9_58: mov ah, bl
mov [edi+4], eax
add edi, edx
nf9_61: mov al, bl
nf9_62: mov ah, bl
shl eax, 16
nf9_63: mov al, bl
nf9_64: mov ah, bl
mov [edi], eax
nf9_65: mov al, bl
nf9_66: mov ah, bl
shl eax, 16
nf9_67: mov al, bl
nf9_68: mov ah, bl
mov [edi+4], eax
add edi, edx
nf9_71: mov al, bl
nf9_72: mov ah, bl
shl eax, 16
nf9_73: mov al, bl
nf9_74: mov ah, bl
mov [edi], eax
nf9_75: mov al, bl
nf9_76: mov ah, bl
shl eax, 16
nf9_77: mov al, bl
nf9_78: mov ah, bl
mov [edi+4], eax
add edi, edx
nf9_81: mov al, bl
nf9_82: mov ah, bl
shl eax, 16
nf9_83: mov al, bl
nf9_84: mov ah, bl
mov [edi], eax
nf9_85: mov al, bl
nf9_86: mov ah, bl
shl eax, 16
nf9_87: mov al, bl
nf9_88: mov ah, bl
mov [edi+4], eax
add esi, 20
sub edi, nfpk_back_right
retn
;----------------------------------------
ALIGN 4
;nf9+16
nf25: ; low 4x4x2 (8 bytes)
if 0 ;debug
mov eax, 0
mov ebx, 0
add esi, 8
jmp nf_solid
endif
xor eax, eax
lea ecx, nfpk_mov4
lea edx, byte ptr ds:nf25_11+1
mov al, [esi+4]
mov ebx, [ecx+eax*4]
mov [edx+(nf25_14-nf25_11)], bl
mov [edx+(nf25_13-nf25_11)], bh
shr ebx, 16
mov [edx+(nf25_12-nf25_11)], bl
mov [edx+(nf25_11-nf25_11)], bh
mov al, [esi+5]
mov ebx, [ecx+eax*4]
mov [edx+(nf25_24-nf25_11)], bl
mov [edx+(nf25_23-nf25_11)], bh
shr ebx, 16
mov [edx+(nf25_22-nf25_11)], bl
mov [edx+(nf25_21-nf25_11)], bh
mov al, [esi+6]
mov ebx, [ecx+eax*4]
mov [edx+(nf25_34-nf25_11)], bl
mov [edx+(nf25_33-nf25_11)], bh
shr ebx, 16
mov [edx+(nf25_32-nf25_11)], bl
mov [edx+(nf25_31-nf25_11)], bh
mov al, [esi+7]
mov ebx, [ecx+eax*4]
mov [edx+(nf25_44-nf25_11)], bl
mov [edx+(nf25_43-nf25_11)], bh
shr ebx, 16
mov [edx+(nf25_42-nf25_11)], bl
mov [edx+(nf25_41-nf25_11)], bh
; Load bl,bh,cl,ch with four colors
mov bx, [esi]
mov cx, [esi+2]
mov edx, nf_width
jmp nf25_0 ; flush prefetch
ALIGN 4
nf25_0:
nf25_11:mov ah, bl
mov al, ah
shl eax, 16
nf25_12:mov al, bl
mov ah, al
mov [edi], eax
mov [edi+edx], eax
nf25_13:mov ah, bl
mov al, ah
shl eax, 16
nf25_14:mov al, bl
mov ah, al
mov [edi+4], eax
mov [edi+edx+4], eax
lea edi, [edi+edx*2]
nf25_21:mov ah, bl
mov al, ah
shl eax, 16
nf25_22:mov al, bl
mov ah, al
mov [edi], eax
mov [edi+edx], eax
nf25_23:mov ah, bl
mov al, ah
shl eax, 16
nf25_24:mov al, bl
mov ah, al
mov [edi+4], eax
mov [edi+edx+4], eax
lea edi, [edi+edx*2]
nf25_31:mov ah, bl
mov al, ah
shl eax, 16
nf25_32:mov al, bl
mov ah, al
mov [edi], eax
mov [edi+edx], eax
nf25_33:mov ah, bl
mov al, ah
shl eax, 16
nf25_34:mov al, bl
mov ah, al
mov [edi+4], eax
mov [edi+edx+4], eax
lea edi, [edi+edx*2]
nf25_41:mov ah, bl
mov al, ah
shl eax, 16
nf25_42:mov al, bl
mov ah, al
mov [edi], eax
mov [edi+edx], eax
nf25_43:mov ah, bl
mov al, ah
shl eax, 16
nf25_44:mov al, bl
mov ah, al
mov [edi+4], eax
mov [edi+edx+4], eax
add edi, edx
add esi, 8
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
;----------------------------------------
ALIGN 4
;nf9+32
nf41: ; low 4x8x2 (12 bytes)
shr eax, 16
cmp al, ah
ja nf57
xor eax, eax
lea ecx, nfpk_mov4
lea edx, byte ptr ds:nf41_11+1
mov al, [esi+4]
mov ebx, [ecx+eax*4]
mov [edx+(nf41_14-nf41_11)], bl
mov [edx+(nf41_13-nf41_11)], bh
shr ebx, 16
mov [edx+(nf41_12-nf41_11)], bl
mov [edx+(nf41_11-nf41_11)], bh
mov al, [esi+5]
mov ebx, [ecx+eax*4]
mov [edx+(nf41_24-nf41_11)], bl
mov [edx+(nf41_23-nf41_11)], bh
shr ebx, 16
mov [edx+(nf41_22-nf41_11)], bl
mov [edx+(nf41_21-nf41_11)], bh
mov al, [esi+6]
mov ebx, [ecx+eax*4]
mov [edx+(nf41_34-nf41_11)], bl
mov [edx+(nf41_33-nf41_11)], bh
shr ebx, 16
mov [edx+(nf41_32-nf41_11)], bl
mov [edx+(nf41_31-nf41_11)], bh
mov al, [esi+7]
mov ebx, [ecx+eax*4]
mov [edx+(nf41_44-nf41_11)], bl
mov [edx+(nf41_43-nf41_11)], bh
shr ebx, 16
mov [edx+(nf41_42-nf41_11)], bl
mov [edx+(nf41_41-nf41_11)], bh
lea edx, [edx+(nf41_51-nf41_11)]
mov al, [esi+8]
mov ebx, [ecx+eax*4]
mov [edx+(nf41_54-nf41_51)], bl
mov [edx+(nf41_53-nf41_51)], bh
shr ebx, 16
mov [edx+(nf41_52-nf41_51)], bl
mov [edx+(nf41_51-nf41_51)], bh
mov al, [esi+9]
mov ebx, [ecx+eax*4]
mov [edx+(nf41_64-nf41_51)], bl
mov [edx+(nf41_63-nf41_51)], bh
shr ebx, 16
mov [edx+(nf41_62-nf41_51)], bl
mov [edx+(nf41_61-nf41_51)], bh
mov al, [esi+10]
mov ebx, [ecx+eax*4]
mov [edx+(nf41_74-nf41_51)], bl
mov [edx+(nf41_73-nf41_51)], bh
shr ebx, 16
mov [edx+(nf41_72-nf41_51)], bl
mov [edx+(nf41_71-nf41_51)], bh
mov al, [esi+11]
mov ebx, [ecx+eax*4]
mov [edx+(nf41_84-nf41_51)], bl
mov [edx+(nf41_83-nf41_51)], bh
shr ebx, 16
mov [edx+(nf41_82-nf41_51)], bl
mov [edx+(nf41_81-nf41_51)], bh
; Load bl,bh,cl,ch with four colors
mov bx, [esi]
mov cx, [esi+2]
mov edx, nf_width
jmp nf41_0 ; flush prefetch
ALIGN 4
nf41_0:
nf41_11:mov ah, bl
mov al, ah
shl eax, 16
nf41_12:mov al, bl
mov ah, al
mov [edi], eax
nf41_13:mov ah, bl
mov al, ah
shl eax, 16
nf41_14:mov al, bl
mov ah, al
mov [edi+4], eax
add edi, edx
nf41_21:mov ah, bl
mov al, ah
shl eax, 16
nf41_22:mov al, bl
mov ah, al
mov [edi], eax
nf41_23:mov ah, bl
mov al, ah
shl eax, 16
nf41_24:mov al, bl
mov ah, al
mov [edi+4], eax
add edi, edx
nf41_31:mov ah, bl
mov al, ah
shl eax, 16
nf41_32:mov al, bl
mov ah, al
mov [edi], eax
nf41_33:mov ah, bl
mov al, ah
shl eax, 16
nf41_34:mov al, bl
mov ah, al
mov [edi+4], eax
add edi, edx
nf41_41:mov ah, bl
mov al, ah
shl eax, 16
nf41_42:mov al, bl
mov ah, al
mov [edi], eax
nf41_43:mov ah, bl
mov al, ah
shl eax, 16
nf41_44:mov al, bl
mov ah, al
mov [edi+4], eax
add edi, edx
nf41_51:mov ah, bl
mov al, ah
shl eax, 16
nf41_52:mov al, bl
mov ah, al
mov [edi], eax
nf41_53:mov ah, bl
mov al, ah
shl eax, 16
nf41_54:mov al, bl
mov ah, al
mov [edi+4], eax
add edi, edx
nf41_61:mov ah, bl
mov al, ah
shl eax, 16
nf41_62:mov al, bl
mov ah, al
mov [edi], eax
nf41_63:mov ah, bl
mov al, ah
shl eax, 16
nf41_64:mov al, bl
mov ah, al
mov [edi+4], eax
add edi, edx
nf41_71:mov ah, bl
mov al, ah
shl eax, 16
nf41_72:mov al, bl
mov ah, al
mov [edi], eax
nf41_73:mov ah, bl
mov al, ah
shl eax, 16
nf41_74:mov al, bl
mov ah, al
mov [edi+4], eax
add edi, edx
nf41_81:mov ah, bl
mov al, ah
shl eax, 16
nf41_82:mov al, bl
mov ah, al
mov [edi], eax
nf41_83:mov ah, bl
mov al, ah
shl eax, 16
nf41_84:mov al, bl
mov ah, al
mov [edi+4], eax
add esi, 12
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
;----------------------------------------
ALIGN 4
;nf9+48
nf57: ; low 8x4x2 (12 bytes)
xor eax, eax
lea ecx, nfpk_mov4
lea edx, byte ptr ds:nf57_11+1
mov al, [esi+4]
mov ebx, [ecx+eax*4]
mov [edx+(nf57_11-nf57_11)], bl
mov [edx+(nf57_12-nf57_11)], bh
shr ebx, 16
mov [edx+(nf57_13-nf57_11)], bl
mov [edx+(nf57_14-nf57_11)], bh
mov al, [esi+5]
mov ebx, [ecx+eax*4]
mov [edx+(nf57_15-nf57_11)], bl
mov [edx+(nf57_16-nf57_11)], bh
shr ebx, 16
mov [edx+(nf57_17-nf57_11)], bl
mov [edx+(nf57_18-nf57_11)], bh
mov al, [esi+6]
mov ebx, [ecx+eax*4]
mov [edx+(nf57_21-nf57_11)], bl
mov [edx+(nf57_22-nf57_11)], bh
shr ebx, 16
mov [edx+(nf57_23-nf57_11)], bl
mov [edx+(nf57_24-nf57_11)], bh
mov al, [esi+7]
mov ebx, [ecx+eax*4]
mov [edx+(nf57_25-nf57_11)], bl
mov [edx+(nf57_26-nf57_11)], bh
shr ebx, 16
mov [edx+(nf57_27-nf57_11)], bl
mov [edx+(nf57_28-nf57_11)], bh
mov al, [esi+8]
mov ebx, [ecx+eax*4]
mov [edx+(nf57_31-nf57_11)], bl
mov [edx+(nf57_32-nf57_11)], bh
shr ebx, 16
mov [edx+(nf57_33-nf57_11)], bl
mov [edx+(nf57_34-nf57_11)], bh
mov al, [esi+9]
mov ebx, [ecx+eax*4]
mov [edx+(nf57_35-nf57_11)], bl
mov [edx+(nf57_36-nf57_11)], bh
shr ebx, 16
mov [edx+(nf57_37-nf57_11)], bl
mov [edx+(nf57_38-nf57_11)], bh
mov al, [esi+10]
mov ebx, [ecx+eax*4]
mov [edx+(nf57_41-nf57_11)], bl
mov [edx+(nf57_42-nf57_11)], bh
shr ebx, 16
mov [edx+(nf57_43-nf57_11)], bl
mov [edx+(nf57_44-nf57_11)], bh
mov al, [esi+11]
mov ebx, [ecx+eax*4]
mov [edx+(nf57_45-nf57_11)], bl
mov [edx+(nf57_46-nf57_11)], bh
shr ebx, 16
mov [edx+(nf57_47-nf57_11)], bl
mov [edx+(nf57_48-nf57_11)], bh
; Load bl,bh,cl,ch with four colors
mov bx, [esi]
mov cx, [esi+2]
mov edx, nf_width
jmp nf57_0 ; flush prefetch
ALIGN 4
nf57_0:
nf57_11:mov al, bl
nf57_12:mov ah, bl
shl eax, 16
nf57_13:mov al, bl
nf57_14:mov ah, bl
mov [edi], eax
mov [edi+edx], eax
nf57_15:mov al, bl
nf57_16:mov ah, bl
shl eax, 16
nf57_17:mov al, bl
nf57_18:mov ah, bl
mov [edi+4], eax
mov [edi+edx+4], eax
lea edi, [edi+edx*2]
nf57_21:mov al, bl
nf57_22:mov ah, bl
shl eax, 16
nf57_23:mov al, bl
nf57_24:mov ah, bl
mov [edi], eax
mov [edi+edx], eax
nf57_25:mov al, bl
nf57_26:mov ah, bl
shl eax, 16
nf57_27:mov al, bl
nf57_28:mov ah, bl
mov [edi+4], eax
mov [edi+edx+4], eax
lea edi, [edi+edx*2]
nf57_31:mov al, bl
nf57_32:mov ah, bl
shl eax, 16
nf57_33:mov al, bl
nf57_34:mov ah, bl
mov [edi], eax
mov [edi+edx], eax
nf57_35:mov al, bl
nf57_36:mov ah, bl
shl eax, 16
nf57_37:mov al, bl
nf57_38:mov ah, bl
mov [edi+4], eax
mov [edi+edx+4], eax
lea edi, [edi+edx*2]
nf57_41:mov al, bl
nf57_42:mov ah, bl
shl eax, 16
nf57_43:mov al, bl
nf57_44:mov ah, bl
mov [edi], eax
mov [edi+edx], eax
nf57_45:mov al, bl
nf57_46:mov ah, bl
shl eax, 16
nf57_47:mov al, bl
nf57_48:mov ah, bl
mov [edi+4], eax
mov [edi+edx+4], eax
add edi, edx
add esi, 12
sub edi, nfpk_back_right
retn
;----------------------------------------
ALIGN 4
nf10: ; 2x2 4x4x2 (32 bytes)
mov ax, [esi]
cmp al, ah
ja nf26
xor eax, eax
lea ecx, nfpk_mov4
lea edx, byte ptr ds:nf10_11+1
mov al, [esi+4]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_11-nf10_11)], bl
mov [edx+(nf10_12-nf10_11)], bh
shr ebx, 16
mov [edx+(nf10_13-nf10_11)], bl
mov [edx+(nf10_14-nf10_11)], bh
mov al, [esi+5]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_15-nf10_11)], bl
mov [edx+(nf10_16-nf10_11)], bh
shr ebx, 16
mov [edx+(nf10_17-nf10_11)], bl
mov [edx+(nf10_18-nf10_11)], bh
mov al, [esi+6]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_21-nf10_11)], bl
mov [edx+(nf10_22-nf10_11)], bh
shr ebx, 16
mov [edx+(nf10_23-nf10_11)], bl
mov [edx+(nf10_24-nf10_11)], bh
mov al, [esi+7]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_25-nf10_11)], bl
mov [edx+(nf10_26-nf10_11)], bh
shr ebx, 16
mov [edx+(nf10_27-nf10_11)], bl
mov [edx+(nf10_28-nf10_11)], bh
mov al, [esi+12]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_31-nf10_11)], bl
mov [edx+(nf10_32-nf10_11)], bh
shr ebx, 16
mov [edx+(nf10_33-nf10_11)], bl
mov [edx+(nf10_34-nf10_11)], bh
mov al, [esi+13]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_35-nf10_11)], bl
mov [edx+(nf10_36-nf10_11)], bh
shr ebx, 16
mov [edx+(nf10_37-nf10_11)], bl
mov [edx+(nf10_38-nf10_11)], bh
mov al, [esi+14]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_41-nf10_11)], bl
mov [edx+(nf10_42-nf10_11)], bh
shr ebx, 16
mov [edx+(nf10_43-nf10_11)], bl
mov [edx+(nf10_44-nf10_11)], bh
mov al, [esi+15]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_45-nf10_11)], bl
mov [edx+(nf10_46-nf10_11)], bh
shr ebx, 16
mov [edx+(nf10_47-nf10_11)], bl
mov [edx+(nf10_48-nf10_11)], bh
lea edx, [edx+(nf10_51-nf10_11)]
mov al, [esi+20]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_51-nf10_51)], bl
mov [edx+(nf10_52-nf10_51)], bh
shr ebx, 16
mov [edx+(nf10_53-nf10_51)], bl
mov [edx+(nf10_54-nf10_51)], bh
mov al, [esi+21]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_55-nf10_51)], bl
mov [edx+(nf10_56-nf10_51)], bh
shr ebx, 16
mov [edx+(nf10_57-nf10_51)], bl
mov [edx+(nf10_58-nf10_51)], bh
mov al, [esi+22]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_61-nf10_51)], bl
mov [edx+(nf10_62-nf10_51)], bh
shr ebx, 16
mov [edx+(nf10_63-nf10_51)], bl
mov [edx+(nf10_64-nf10_51)], bh
mov al, [esi+23]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_65-nf10_51)], bl
mov [edx+(nf10_66-nf10_51)], bh
shr ebx, 16
mov [edx+(nf10_67-nf10_51)], bl
mov [edx+(nf10_68-nf10_51)], bh
mov al, [esi+28]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_71-nf10_51)], bl
mov [edx+(nf10_72-nf10_51)], bh
shr ebx, 16
mov [edx+(nf10_73-nf10_51)], bl
mov [edx+(nf10_74-nf10_51)], bh
mov al, [esi+29]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_75-nf10_51)], bl
mov [edx+(nf10_76-nf10_51)], bh
shr ebx, 16
mov [edx+(nf10_77-nf10_51)], bl
mov [edx+(nf10_78-nf10_51)], bh
mov al, [esi+30]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_81-nf10_51)], bl
mov [edx+(nf10_82-nf10_51)], bh
shr ebx, 16
mov [edx+(nf10_83-nf10_51)], bl
mov [edx+(nf10_84-nf10_51)], bh
mov al, [esi+31]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_85-nf10_51)], bl
mov [edx+(nf10_86-nf10_51)], bh
shr ebx, 16
mov [edx+(nf10_87-nf10_51)], bl
mov [edx+(nf10_88-nf10_51)], bh
; Load bl,bh,cl,ch with four colors
mov bx, [esi]
mov cx, [esi+2]
mov edx, nf_width
jmp nf10_0 ; flush prefetch
ALIGN 4
nf10_0:
nf10_11:mov al, bl
nf10_12:mov ah, bl
shl eax, 16
nf10_13:mov al, bl
nf10_14:mov ah, bl
mov [edi], eax
add edi, edx
nf10_15:mov al, bl
nf10_16:mov ah, bl
shl eax, 16
nf10_17:mov al, bl
nf10_18:mov ah, bl
mov [edi], eax
add edi, edx
nf10_21:mov al, bl
nf10_22:mov ah, bl
shl eax, 16
nf10_23:mov al, bl
nf10_24:mov ah, bl
mov [edi], eax
add edi, edx
nf10_25:mov al, bl
nf10_26:mov ah, bl
shl eax, 16
nf10_27:mov al, bl
nf10_28:mov ah, bl
mov [edi], eax
add edi, edx
; Load bl,bh,cl,ch with four colors
mov bx, [esi+8]
mov cx, [esi+10]
nf10_31:mov al, bl
nf10_32:mov ah, bl
shl eax, 16
nf10_33:mov al, bl
nf10_34:mov ah, bl
mov [edi], eax
add edi, edx
nf10_35:mov al, bl
nf10_36:mov ah, bl
shl eax, 16
nf10_37:mov al, bl
nf10_38:mov ah, bl
mov [edi], eax
add edi, edx
nf10_41:mov al, bl
nf10_42:mov ah, bl
shl eax, 16
nf10_43:mov al, bl
nf10_44:mov ah, bl
mov [edi], eax
add edi, edx
nf10_45:mov al, bl
nf10_46:mov ah, bl
shl eax, 16
nf10_47:mov al, bl
nf10_48:mov ah, bl
mov [edi], eax
add edi, edx
lea eax, [edx*8-4]
sub edi, eax
; Load bl,bh,cl,ch with four colors
mov bx, [esi+16]
mov cx, [esi+18]
nf10_51:mov al, bl
nf10_52:mov ah, bl
shl eax, 16
nf10_53:mov al, bl
nf10_54:mov ah, bl
mov [edi], eax
add edi, edx
nf10_55:mov al, bl
nf10_56:mov ah, bl
shl eax, 16
nf10_57:mov al, bl
nf10_58:mov ah, bl
mov [edi], eax
add edi, edx
nf10_61:mov al, bl
nf10_62:mov ah, bl
shl eax, 16
nf10_63:mov al, bl
nf10_64:mov ah, bl
mov [edi], eax
add edi, edx
nf10_65:mov al, bl
nf10_66:mov ah, bl
shl eax, 16
nf10_67:mov al, bl
nf10_68:mov ah, bl
mov [edi], eax
add edi, edx
; Load bl,bh,cl,ch with four colors
mov bx, [esi+24]
mov cx, [esi+26]
nf10_71:mov al, bl
nf10_72:mov ah, bl
shl eax, 16
nf10_73:mov al, bl
nf10_74:mov ah, bl
mov [edi], eax
add edi, edx
nf10_75:mov al, bl
nf10_76:mov ah, bl
shl eax, 16
nf10_77:mov al, bl
nf10_78:mov ah, bl
mov [edi], eax
add edi, edx
nf10_81:mov al, bl
nf10_82:mov ah, bl
shl eax, 16
nf10_83:mov al, bl
nf10_84:mov ah, bl
mov [edi], eax
add edi, edx
nf10_85:mov al, bl
nf10_86:mov ah, bl
shl eax, 16
nf10_87:mov al, bl
nf10_88:mov ah, bl
mov [edi], eax
add esi, 32
sub edi, 4
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
;----------------------------------------
ALIGN 4
;nf10+16
nf26: ; 2x1 4x8x2 (24 bytes)
mov ax, [esi+12]
cmp al, ah
ja nf42
if 0 ;debug
mov eax, 0
mov ebx, 0
add esi, 24
jmp nf_solid
endif
xor eax, eax
lea ecx, nfpk_mov4
lea edx, byte ptr ds:nf26_11+1
mov al, [esi+4]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_11-nf26_11)], bl
mov [edx+(nf26_12-nf26_11)], bh
shr ebx, 16
mov [edx+(nf26_13-nf26_11)], bl
mov [edx+(nf26_14-nf26_11)], bh
mov al, [esi+5]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_15-nf26_11)], bl
mov [edx+(nf26_16-nf26_11)], bh
shr ebx, 16
mov [edx+(nf26_17-nf26_11)], bl
mov [edx+(nf26_18-nf26_11)], bh
mov al, [esi+6]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_21-nf26_11)], bl
mov [edx+(nf26_22-nf26_11)], bh
shr ebx, 16
mov [edx+(nf26_23-nf26_11)], bl
mov [edx+(nf26_24-nf26_11)], bh
mov al, [esi+7]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_25-nf26_11)], bl
mov [edx+(nf26_26-nf26_11)], bh
shr ebx, 16
mov [edx+(nf26_27-nf26_11)], bl
mov [edx+(nf26_28-nf26_11)], bh
mov al, [esi+8]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_31-nf26_11)], bl
mov [edx+(nf26_32-nf26_11)], bh
shr ebx, 16
mov [edx+(nf26_33-nf26_11)], bl
mov [edx+(nf26_34-nf26_11)], bh
mov al, [esi+9]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_35-nf26_11)], bl
mov [edx+(nf26_36-nf26_11)], bh
shr ebx, 16
mov [edx+(nf26_37-nf26_11)], bl
mov [edx+(nf26_38-nf26_11)], bh
mov al, [esi+10]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_41-nf26_11)], bl
mov [edx+(nf26_42-nf26_11)], bh
shr ebx, 16
mov [edx+(nf26_43-nf26_11)], bl
mov [edx+(nf26_44-nf26_11)], bh
mov al, [esi+11]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_45-nf26_11)], bl
mov [edx+(nf26_46-nf26_11)], bh
shr ebx, 16
mov [edx+(nf26_47-nf26_11)], bl
mov [edx+(nf26_48-nf26_11)], bh
lea edx, [edx+(nf26_51-nf26_11)]
mov al, [esi+16]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_51-nf26_51)], bl
mov [edx+(nf26_52-nf26_51)], bh
shr ebx, 16
mov [edx+(nf26_53-nf26_51)], bl
mov [edx+(nf26_54-nf26_51)], bh
mov al, [esi+17]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_55-nf26_51)], bl
mov [edx+(nf26_56-nf26_51)], bh
shr ebx, 16
mov [edx+(nf26_57-nf26_51)], bl
mov [edx+(nf26_58-nf26_51)], bh
mov al, [esi+18]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_61-nf26_51)], bl
mov [edx+(nf26_62-nf26_51)], bh
shr ebx, 16
mov [edx+(nf26_63-nf26_51)], bl
mov [edx+(nf26_64-nf26_51)], bh
mov al, [esi+19]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_65-nf26_51)], bl
mov [edx+(nf26_66-nf26_51)], bh
shr ebx, 16
mov [edx+(nf26_67-nf26_51)], bl
mov [edx+(nf26_68-nf26_51)], bh
mov al, [esi+20]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_71-nf26_51)], bl
mov [edx+(nf26_72-nf26_51)], bh
shr ebx, 16
mov [edx+(nf26_73-nf26_51)], bl
mov [edx+(nf26_74-nf26_51)], bh
mov al, [esi+21]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_75-nf26_51)], bl
mov [edx+(nf26_76-nf26_51)], bh
shr ebx, 16
mov [edx+(nf26_77-nf26_51)], bl
mov [edx+(nf26_78-nf26_51)], bh
mov al, [esi+22]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_81-nf26_51)], bl
mov [edx+(nf26_82-nf26_51)], bh
shr ebx, 16
mov [edx+(nf26_83-nf26_51)], bl
mov [edx+(nf26_84-nf26_51)], bh
mov al, [esi+23]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_85-nf26_51)], bl
mov [edx+(nf26_86-nf26_51)], bh
shr ebx, 16
mov [edx+(nf26_87-nf26_51)], bl
mov [edx+(nf26_88-nf26_51)], bh
; Load bl,bh,cl,ch with four colors
mov bx, [esi]
mov cx, [esi+2]
mov edx, nf_width
jmp nf26_0 ; flush prefetch
ALIGN 4
nf26_0:
nf26_11:mov al, bl
nf26_12:mov ah, bl
shl eax, 16
nf26_13:mov al, bl
nf26_14:mov ah, bl
mov [edi], eax
add edi, edx
nf26_15:mov al, bl
nf26_16:mov ah, bl
shl eax, 16
nf26_17:mov al, bl
nf26_18:mov ah, bl
mov [edi], eax
add edi, edx
nf26_21:mov al, bl
nf26_22:mov ah, bl
shl eax, 16
nf26_23:mov al, bl
nf26_24:mov ah, bl
mov [edi], eax
add edi, edx
nf26_25:mov al, bl
nf26_26:mov ah, bl
shl eax, 16
nf26_27:mov al, bl
nf26_28:mov ah, bl
mov [edi], eax
add edi, edx
nf26_31:mov al, bl
nf26_32:mov ah, bl
shl eax, 16
nf26_33:mov al, bl
nf26_34:mov ah, bl
mov [edi], eax
add edi, edx
nf26_35:mov al, bl
nf26_36:mov ah, bl
shl eax, 16
nf26_37:mov al, bl
nf26_38:mov ah, bl
mov [edi], eax
add edi, edx
nf26_41:mov al, bl
nf26_42:mov ah, bl
shl eax, 16
nf26_43:mov al, bl
nf26_44:mov ah, bl
mov [edi], eax
add edi, edx
nf26_45:mov al, bl
nf26_46:mov ah, bl
shl eax, 16
nf26_47:mov al, bl
nf26_48:mov ah, bl
mov [edi], eax
add edi, edx
lea eax, [edx*8-4]
sub edi, eax
; Load bl,bh,cl,ch with four colors
mov bx, [esi+12]
mov cx, [esi+14]
nf26_51:mov al, bl
nf26_52:mov ah, bl
shl eax, 16
nf26_53:mov al, bl
nf26_54:mov ah, bl
mov [edi], eax
add edi, edx
nf26_55:mov al, bl
nf26_56:mov ah, bl
shl eax, 16
nf26_57:mov al, bl
nf26_58:mov ah, bl
mov [edi], eax
add edi, edx
nf26_61:mov al, bl
nf26_62:mov ah, bl
shl eax, 16
nf26_63:mov al, bl
nf26_64:mov ah, bl
mov [edi], eax
add edi, edx
nf26_65:mov al, bl
nf26_66:mov ah, bl
shl eax, 16
nf26_67:mov al, bl
nf26_68:mov ah, bl
mov [edi], eax
add edi, edx
nf26_71:mov al, bl
nf26_72:mov ah, bl
shl eax, 16
nf26_73:mov al, bl
nf26_74:mov ah, bl
mov [edi], eax
add edi, edx
nf26_75:mov al, bl
nf26_76:mov ah, bl
shl eax, 16
nf26_77:mov al, bl
nf26_78:mov ah, bl
mov [edi], eax
add edi, edx
nf26_81:mov al, bl
nf26_82:mov ah, bl
shl eax, 16
nf26_83:mov al, bl
nf26_84:mov ah, bl
mov [edi], eax
add edi, edx
nf26_85:mov al, bl
nf26_86:mov ah, bl
shl eax, 16
nf26_87:mov al, bl
nf26_88:mov ah, bl
mov [edi], eax
add esi, 24
sub edi, 4
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
;----------------------------------------
ALIGN 4
;nf10+32
nf42: ; 1x2 8x4x2 (24 bytes)
if 0 ;debug
mov eax, 0
mov ebx, 0
add esi, 24
jmp nf_solid
endif
xor eax, eax
lea ecx, nfpk_mov4
lea edx, byte ptr ds:nf42_11+1
mov al, [esi+4]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_11-nf42_11)], bl
mov [edx+(nf42_12-nf42_11)], bh
shr ebx, 16
mov [edx+(nf42_13-nf42_11)], bl
mov [edx+(nf42_14-nf42_11)], bh
mov al, [esi+5]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_15-nf42_11)], bl
mov [edx+(nf42_16-nf42_11)], bh
shr ebx, 16
mov [edx+(nf42_17-nf42_11)], bl
mov [edx+(nf42_18-nf42_11)], bh
mov al, [esi+6]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_21-nf42_11)], bl
mov [edx+(nf42_22-nf42_11)], bh
shr ebx, 16
mov [edx+(nf42_23-nf42_11)], bl
mov [edx+(nf42_24-nf42_11)], bh
mov al, [esi+7]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_25-nf42_11)], bl
mov [edx+(nf42_26-nf42_11)], bh
shr ebx, 16
mov [edx+(nf42_27-nf42_11)], bl
mov [edx+(nf42_28-nf42_11)], bh
mov al, [esi+8]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_31-nf42_11)], bl
mov [edx+(nf42_32-nf42_11)], bh
shr ebx, 16
mov [edx+(nf42_33-nf42_11)], bl
mov [edx+(nf42_34-nf42_11)], bh
mov al, [esi+9]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_35-nf42_11)], bl
mov [edx+(nf42_36-nf42_11)], bh
shr ebx, 16
mov [edx+(nf42_37-nf42_11)], bl
mov [edx+(nf42_38-nf42_11)], bh
mov al, [esi+10]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_41-nf42_11)], bl
mov [edx+(nf42_42-nf42_11)], bh
shr ebx, 16
mov [edx+(nf42_43-nf42_11)], bl
mov [edx+(nf42_44-nf42_11)], bh
mov al, [esi+11]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_45-nf42_11)], bl
mov [edx+(nf42_46-nf42_11)], bh
shr ebx, 16
mov [edx+(nf42_47-nf42_11)], bl
mov [edx+(nf42_48-nf42_11)], bh
lea edx, [edx+(nf42_51-nf42_11)]
mov al, [esi+16]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_51-nf42_51)], bl
mov [edx+(nf42_52-nf42_51)], bh
shr ebx, 16
mov [edx+(nf42_53-nf42_51)], bl
mov [edx+(nf42_54-nf42_51)], bh
mov al, [esi+17]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_55-nf42_51)], bl
mov [edx+(nf42_56-nf42_51)], bh
shr ebx, 16
mov [edx+(nf42_57-nf42_51)], bl
mov [edx+(nf42_58-nf42_51)], bh
mov al, [esi+18]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_61-nf42_51)], bl
mov [edx+(nf42_62-nf42_51)], bh
shr ebx, 16
mov [edx+(nf42_63-nf42_51)], bl
mov [edx+(nf42_64-nf42_51)], bh
mov al, [esi+19]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_65-nf42_51)], bl
mov [edx+(nf42_66-nf42_51)], bh
shr ebx, 16
mov [edx+(nf42_67-nf42_51)], bl
mov [edx+(nf42_68-nf42_51)], bh
mov al, [esi+20]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_71-nf42_51)], bl
mov [edx+(nf42_72-nf42_51)], bh
shr ebx, 16
mov [edx+(nf42_73-nf42_51)], bl
mov [edx+(nf42_74-nf42_51)], bh
mov al, [esi+21]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_75-nf42_51)], bl
mov [edx+(nf42_76-nf42_51)], bh
shr ebx, 16
mov [edx+(nf42_77-nf42_51)], bl
mov [edx+(nf42_78-nf42_51)], bh
mov al, [esi+22]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_81-nf42_51)], bl
mov [edx+(nf42_82-nf42_51)], bh
shr ebx, 16
mov [edx+(nf42_83-nf42_51)], bl
mov [edx+(nf42_84-nf42_51)], bh
mov al, [esi+23]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_85-nf42_51)], bl
mov [edx+(nf42_86-nf42_51)], bh
shr ebx, 16
mov [edx+(nf42_87-nf42_51)], bl
mov [edx+(nf42_88-nf42_51)], bh
; Load bl,bh,cl,ch with four colors
mov bx, [esi]
mov cx, [esi+2]
mov edx, nf_width
jmp nf42_0 ; flush prefetch
ALIGN 4
nf42_0:
nf42_11:mov al, bl
nf42_12:mov ah, bl
shl eax, 16
nf42_13:mov al, bl
nf42_14:mov ah, bl
mov [edi], eax
nf42_15:mov al, bl
nf42_16:mov ah, bl
shl eax, 16
nf42_17:mov al, bl
nf42_18:mov ah, bl
mov [edi+4], eax
add edi, edx
nf42_21:mov al, bl
nf42_22:mov ah, bl
shl eax, 16
nf42_23:mov al, bl
nf42_24:mov ah, bl
mov [edi], eax
nf42_25:mov al, bl
nf42_26:mov ah, bl
shl eax, 16
nf42_27:mov al, bl
nf42_28:mov ah, bl
mov [edi+4], eax
add edi, edx
nf42_31:mov al, bl
nf42_32:mov ah, bl
shl eax, 16
nf42_33:mov al, bl
nf42_34:mov ah, bl
mov [edi], eax
nf42_35:mov al, bl
nf42_36:mov ah, bl
shl eax, 16
nf42_37:mov al, bl
nf42_38:mov ah, bl
mov [edi+4], eax
add edi, edx
nf42_41:mov al, bl
nf42_42:mov ah, bl
shl eax, 16
nf42_43:mov al, bl
nf42_44:mov ah, bl
mov [edi], eax
nf42_45:mov al, bl
nf42_46:mov ah, bl
shl eax, 16
nf42_47:mov al, bl
nf42_48:mov ah, bl
mov [edi+4], eax
add edi, edx
; Load bl,bh,cl,ch with four colors
mov bx, [esi+12]
mov cx, [esi+14]
nf42_51:mov al, bl
nf42_52:mov ah, bl
shl eax, 16
nf42_53:mov al, bl
nf42_54:mov ah, bl
mov [edi], eax
nf42_55:mov al, bl
nf42_56:mov ah, bl
shl eax, 16
nf42_57:mov al, bl
nf42_58:mov ah, bl
mov [edi+4], eax
add edi, edx
nf42_61:mov al, bl
nf42_62:mov ah, bl
shl eax, 16
nf42_63:mov al, bl
nf42_64:mov ah, bl
mov [edi], eax
nf42_65:mov al, bl
nf42_66:mov ah, bl
shl eax, 16
nf42_67:mov al, bl
nf42_68:mov ah, bl
mov [edi+4], eax
add edi, edx
nf42_71:mov al, bl
nf42_72:mov ah, bl
shl eax, 16
nf42_73:mov al, bl
nf42_74:mov ah, bl
mov [edi], eax
nf42_75:mov al, bl
nf42_76:mov ah, bl
shl eax, 16
nf42_77:mov al, bl
nf42_78:mov ah, bl
mov [edi+4], eax
add edi, edx
nf42_81:mov al, bl
nf42_82:mov ah, bl
shl eax, 16
nf42_83:mov al, bl
nf42_84:mov ah, bl
mov [edi], eax
nf42_85:mov al, bl
nf42_86:mov ah, bl
shl eax, 16
nf42_87:mov al, bl
nf42_88:mov ah, bl
mov [edi+4], eax
add esi, 24
sub edi, nfpk_back_right
retn
;----------------------------------------
ALIGN 4
nf11: ; 8x8x8 (64 bytes)
if 0 ;debug
add esi, 64
mov eax, 0fefefefeH
; mov ebx, eax
mov ebx, 0
jmp nf_solid
endif
mov edx, nf_width
mov eax, [esi] ;0
mov [edi], eax
mov eax, [esi+4]
mov [edi+4], eax
add edi, edx
mov eax, [esi+8] ;1
mov [edi], eax
mov eax, [esi+12]
mov [edi+4], eax
add edi, edx
mov eax, [esi+16] ;2
mov [edi], eax
mov eax, [esi+20]
mov [edi+4], eax
add edi, edx
mov eax, [esi+24] ;3
mov [edi], eax
mov eax, [esi+28]
mov [edi+4], eax
add edi, edx
mov eax, [esi+32] ;4
mov [edi], eax
mov eax, [esi+36]
mov [edi+4], eax
add edi, edx
mov eax, [esi+40] ;5
mov [edi], eax
mov eax, [esi+44]
mov [edi+4], eax
add edi, edx
mov eax, [esi+48] ;6
mov [edi], eax
mov eax, [esi+52]
mov [edi+4], eax
add edi, edx
mov eax, [esi+56] ;7
mov [edi], eax
mov eax, [esi+60]
mov [edi+4], eax
add esi, 64
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
;----------------------------------------
ALIGN 4
nf12: ; low 4x4x8 (16 bytes)
mov edx, nf_width
mov eax, [esi]
mov bl, ah
mov bh, ah
shl ebx, 16
mov bl, al
mov bh, al
mov [edi], ebx
mov [edi+edx], ebx
shr eax, 16
mov bl, ah
mov bh, ah
shl ebx, 16
mov bl, al
mov bh, al
mov [edi+4], ebx
mov [edi+edx+4], ebx
lea edi, [edi+edx*2]
mov eax, [esi+4]
mov bl, ah
mov bh, ah
shl ebx, 16
mov bl, al
mov bh, al
mov [edi], ebx
mov [edi+edx], ebx
shr eax, 16
mov bl, ah
mov bh, ah
shl ebx, 16
mov bl, al
mov bh, al
mov [edi+4], ebx
mov [edi+edx+4], ebx
lea edi, [edi+edx*2]
mov eax, [esi+8]
mov bl, ah
mov bh, ah
shl ebx, 16
mov bl, al
mov bh, al
mov [edi], ebx
mov [edi+edx], ebx
shr eax, 16
mov bl, ah
mov bh, ah
shl ebx, 16
mov bl, al
mov bh, al
mov [edi+4], ebx
mov [edi+edx+4], ebx
lea edi, [edi+edx*2]
mov eax, [esi+12]
mov bl, ah
mov bh, ah
shl ebx, 16
mov bl, al
mov bh, al
mov [edi], ebx
mov [edi+edx], ebx
shr eax, 16
mov bl, ah
mov bh, ah
shl ebx, 16
mov bl, al
mov bh, al
mov [edi+4], ebx
mov [edi+edx+4], ebx
add edi, edx
sub edi, nfpk_back_right
add esi, 16
retn
;----------------------------------------
ALIGN 4
nf13: ; 2x2 4x4x0 (4 bytes)
mov edx, nf_width
mov cl, [esi]
mov ch, cl
mov eax, ecx
shl eax, 16
mov ax, cx
mov cl, [esi+1]
mov ch, cl
mov ebx, ecx
shl ebx, 16
mov bx, cx
mov [edi], eax
mov [edi+4], ebx
mov [edi+edx], eax
mov [edi+edx+4], ebx
lea edi, [edi+edx*2]
mov [edi], eax
mov [edi+4], ebx
mov [edi+edx], eax
mov [edi+edx+4], ebx
lea edi, [edi+edx*2]
mov cl, [esi+2]
mov ch, cl
mov eax, ecx
shl eax, 16
mov ax, cx
mov cl, [esi+3]
mov ch, cl
mov ebx, ecx
shl ebx, 16
mov bx, cx
mov [edi], eax
mov [edi+4], ebx
mov [edi+edx], eax
mov [edi+edx+4], ebx
lea edi, [edi+edx*2]
mov [edi], eax
mov [edi+4], ebx
add edi, edx
mov [edi], eax
mov [edi+4], ebx
sub edi, nfpk_back_right
add esi, 4
retn
;----------------------------------------
ALIGN 4
nf14: ; 8x8x0 (1 byte)
if 0 ;debug
jmp nf0
endif
mov bl, [esi] ; Copy color into 8 positions
inc esi
mov bh, bl
mov eax, ebx
shl eax, 16
mov ax, bx
mov ebx, eax
if 0 ;debug
mov eax, 080808080h
mov ebx, eax
endif
jmp nf_solid
retn
;----------------------------------------
ALIGN 4
nf15: ; mix 8x8x0 (2 bytes)
if 0 ;debug
inc esi
jmp nf0
endif
mov bx, [esi] ; Copy 2 colors into 8 positions
add esi, 2 ; in a checkerboard
mov ax, bx
shl eax, 16
mov ax, bx
mov ebx, eax
rol ebx, 8
if 0 ;debug
mov eax, 080808080h
mov ebx, eax
endif
nf_solid:
mov edx, nf_width
mov [edi], eax
mov [edi+4], eax
add edi, edx
mov [edi], ebx
mov [edi+4], ebx
add edi, edx
mov [edi], eax
mov [edi+4], eax
add edi, edx
mov [edi], ebx
mov [edi+4], ebx
add edi, edx
mov [edi], eax
mov [edi+4], eax
add edi, edx
mov [edi], ebx
mov [edi+4], ebx
add edi, edx
mov [edi], eax
mov [edi+4], eax
add edi, edx
mov [edi], ebx
mov [edi+4], ebx
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
nfPkDecomp ENDP
; Half vertical resolution version (skip odd lines)
;
nfPkDecompH PROC USES ESI EDI EBX, \
ops:PTRBYTE, comp:PTRBYTE, \
x:DWORD, y:DWORD, w:DWORD, h:DWORD
LOCAL tbuf: PTRBYTE
LOCAL new_row:DWORD
LOCAL DiffBufPtrs:DWORD
LOCAL nfpk_back_right: DWORD
LOCAL wcnt:DWORD
LOG_LABEL "StartPkDecomp"
.data
nfpk_OpTblH label dword
dword offset nf0 ; Prev Same (0)
dword offset nf1 ; No change (and copied to screen) (0)
dword offset nf2 ; Near shift from older part of current buf (1)
dword offset nf3 ; Near shift from newer part of current buf (1)
dword offset nf4 ; Near shift from previous buffer (1)
dword offset nf5 ; Far shift from previous buffer (2)
dword offset nf6 ; Far shift from current buffer (2)
; [Or if COMPOPS, run of no changes (0)]
dword offset nf7 ; 8x8x1 (10 bytes) or low 4x4x1 (4 bytes)
dword offset nf8 ; 2x2 4x4x1 (16 bytes) or 2x1 4x8x1 (12 bytes) or 1x2 8x4x1 (12 bytes)
dword offset nf9 ; 8x8x2 (20 bytes) or low 4x4x2 (8 bytes) or
; low 4x8x2 (12 bytes) or low 8x4x2 (12 bytes)
dword offset nf10 ; 2x2 4x4x2 (32 bytes) or 2x1 4x8x2 (24 bytes) or 1x2 4x8x2 (24 bytes)
dword offset nf11 ; 8x8x8 (64 bytes)
dword offset nf12 ; low 4x4x8 (16 bytes)
dword offset nf13 ; 2x2 4x4x0 (ie 2x2x8) (4 bytes)
dword offset nf14 ; 8x8x0 (1 byte)
dword offset nf15 ; mix 8x8x0 (2 bytes)
.code
NF_DECOMP_INIT 0
mov eax, nf_width
shl eax, 2
sub eax, nf_new_w
mov new_row, eax
shr nf_new_h, 1
mov eax, nf_width
lea eax, [eax*2+eax-SWIDTH]
mov nfpk_back_right, eax
mov esi, comp
mov edi, tbuf
nf_StartRow:
mov eax, w
shr eax, 1
mov wcnt,eax
ALIGN 4
nf_NextPair:
dec wcnt
js nf_NextRow
mov ebx, ops
mov al, [ebx]
inc ebx
mov ops, ebx
xor ebx, ebx
mov bl, al
shr bl, 4
and eax, 0Fh
push offset nf_NextPair
push nfpk_OpTblH[ebx*4]
jmp nfpk_OpTblH[eax*4]
nf_NextRow:
add edi, new_row
dec h
jnz nf_StartRow
LOG_LABEL "EndPkDecomp"
ret
;----------------------------------------
ALIGN 4
nf0: ; No change from previous buffer
mov eax, DiffBufPtrs
jmp nf_shift
;----------------------------------------
ALIGN 4
nf1: ; No change (and copied to screen)
add edi, SWIDTH
retn
;----------------------------------------
ALIGN 4
nf2: ; Near shift from older part of current buffer
xor eax, eax
mov al, [esi]
inc esi
mov ax, nfpk_ShiftP2[eax*2]
nf_xyc_shift:
xor ebx, ebx
mov bl, ah
shl eax, 24
sar eax, 24
add bl, 080h
adc bl, 080h
sar bl, 1
add eax, nfpk_ShiftY[ebx*4]
jmp nf_shift
;----------------------------------------
ALIGN 4
nf3: ; Near shift from newer part of current buffer
xor eax, eax
mov al, [esi]
inc esi
mov ax, nfpk_ShiftP2[eax*2]
neg al
neg ah
jmp nf_xyc_shift
;----------------------------------------
ALIGN 4
nf4: ; Near shift from previous buffer
xor eax, eax
mov al, [esi]
inc esi
mov ax, nfpk_ShiftP1[eax*2]
jmp nf_xyp_shift
;----------------------------------------
ALIGN 4
nf5: ; Far shift from previous buffer
mov ax, [esi]
add esi, 2
nf_xyp_shift:
xor ebx, ebx
mov bl, ah
shl eax, 24
sar eax, 24
add bl, 080h
adc bl, 080h
sar bl, 1
add eax, nfpk_ShiftY[ebx*4]
add eax, DiffBufPtrs
jmp nf_shift
;----------------------------------------
ALIGN 4
nf6: ; Run of no changes (must only appear in first nibble opcodes)
; Next nibble k specifies 2k+4 squares with no changes
add esp, 4 ; Next nibble is not an opcode
add ebx, 2 ; (minimum of 4 squares)
ALIGN 4
nf6a: add edi, SWIDTH*2 ; Advance over two squares
dec ebx
jz nf6z ; Last pair of squares
dec wcnt ; Same row?
jns nf6a ; Yes
add edi, new_row ; Advance to next row
dec h ; Decrement row count (should never become zero here)
mov eax, w ; Reset wcnt
shr eax ,1
dec eax
mov wcnt, eax
jmp nf6a
nf6z: retn
;----------------------------------------
ALIGN 4
nf_shift:
if 0 ;debug
mov eax, 0
mov ebx, eax
jmp nf_solid
endif
mov ebx, esi ; save esi
lea esi, [edi+eax]
mov edx, nf_width
REPEAT 3
mov eax, [esi]
mov [edi], eax
mov eax, [esi+4]
mov [edi+4], eax
add esi, edx
add edi, edx
ENDM
mov eax, [esi]
mov [edi], eax
mov eax, [esi+4]
mov [edi+4], eax
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
mov esi, ebx ; restore esi
retn
;----------------------------------------
ALIGN 4
nf7: ; 8x8x1 (10 bytes)
mov ax, [esi]
cmp al, ah
ja nf23
if 0 ;debug
add esi, 10
mov eax, 0fefefefeH
mov ebx, eax
jmp nf_solid
endif
xor eax, eax
lea ecx, nfpk_mov8
lea edx, byte ptr ds:nf7_11+2
mov al, [esi+2]
mov ebx, [ecx+eax*4]
mov [edx+(nf7_11-nf7_11)], bl
mov [edx+(nf7_12-nf7_11)], bh
shr ebx, 16
mov [edx+(nf7_13-nf7_11)], bl
mov [edx+(nf7_14-nf7_11)], bh
mov al, [esi+4]
mov ebx, [ecx+eax*4]
mov [edx+(nf7_31-nf7_11)], bl
mov [edx+(nf7_32-nf7_11)], bh
shr ebx, 16
mov [edx+(nf7_33-nf7_11)], bl
mov [edx+(nf7_34-nf7_11)], bh
lea edx, [edx+(nf7_51-nf7_11)]
mov al, [esi+6]
mov ebx, [ecx+eax*4]
mov [edx+(nf7_51-nf7_51)], bl
mov [edx+(nf7_52-nf7_51)], bh
shr ebx, 16
mov [edx+(nf7_53-nf7_51)], bl
mov [edx+(nf7_54-nf7_51)], bh
mov al, [esi+8]
mov ebx, [ecx+eax*4]
mov [edx+(nf7_71-nf7_51)], bl
mov [edx+(nf7_72-nf7_51)], bh
shr ebx, 16
mov [edx+(nf7_73-nf7_51)], bl
mov [edx+(nf7_74-nf7_51)], bh
push ebp
push esi
; load bx,dx,cx,bp with 00,01,10,11 color combinations
; (note that bits are read least significant first).
mov cx, [esi]
mov esi,nf_width
mov bl,cl
mov bh,cl
mov dl,ch
mov dh,cl
mov al,ch
mov ah,ch
mov ebp,eax
jmp nf7_0 ; flush prefetch
ALIGN 4
nf7_0:
nf7_11: mov ax, bx
shl eax, 16
nf7_12: mov ax, bx
mov [edi], eax
nf7_13: mov ax, bx
shl eax, 16
nf7_14: mov ax, bx
mov [edi+4], eax
add edi, esi
nf7_31: mov ax, bx
shl eax, 16
nf7_32: mov ax, bx
mov [edi], eax
nf7_33: mov ax, bx
shl eax, 16
nf7_34: mov ax, bx
mov [edi+4], eax
add edi, esi
nf7_51: mov ax, bx
shl eax, 16
nf7_52: mov ax, bx
mov [edi], eax
nf7_53: mov ax, bx
shl eax, 16
nf7_54: mov ax, bx
mov [edi+4], eax
add edi, esi
nf7_71: mov ax, bx
shl eax, 16
nf7_72: mov ax, bx
mov [edi], eax
nf7_73: mov ax, bx
shl eax, 16
nf7_74: mov ax, bx
mov [edi+4], eax
pop esi
pop ebp
add esi, 10
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
;----------------------------------------
ALIGN 4
;nf7+16
nf23: ; low 4x4x1 (4 bytes)
xor eax, eax
lea ecx, nfpk_mov4l
lea edx, byte ptr ds:nf23_11+2
mov al, [esi+2]
and al, 0fH
mov ebx, [ecx+eax*4]
mov [edx+(nf23_11-nf23_11)], bl
mov [edx+(nf23_12-nf23_11)], bh
shr ebx, 16
mov [edx+(nf23_13-nf23_11)], bl
mov [edx+(nf23_14-nf23_11)], bh
mov al, [esi+2]
shr al, 4
mov ebx, [ecx+eax*4]
mov [edx+(nf23_31-nf23_11)], bl
mov [edx+(nf23_32-nf23_11)], bh
shr ebx, 16
mov [edx+(nf23_33-nf23_11)], bl
mov [edx+(nf23_34-nf23_11)], bh
mov al, [esi+3]
and al, 0fH
mov ebx, [ecx+eax*4]
mov [edx+(nf23_51-nf23_11)], bl
mov [edx+(nf23_52-nf23_11)], bh
shr ebx, 16
mov [edx+(nf23_53-nf23_11)], bl
mov [edx+(nf23_54-nf23_11)], bh
mov al, [esi+3]
shr al, 4
mov ebx, [ecx+eax*4]
mov [edx+(nf23_71-nf23_11)], bl
mov [edx+(nf23_72-nf23_11)], bh
shr ebx, 16
mov [edx+(nf23_73-nf23_11)], bl
mov [edx+(nf23_74-nf23_11)], bh
mov edx, nf_width
; load bx,cx with 00,11 color combinations
mov bx, [esi]
mov cl, bh
mov bh, bl
mov ch, cl
jmp nf23_0 ; flush prefetch
ALIGN 4
nf23_0:
nf23_11:mov ax, bx
shl eax, 16
nf23_12:mov ax, bx
mov [edi], eax
nf23_13:mov ax, bx
shl eax, 16
nf23_14:mov ax, bx
mov [edi+4], eax
add edi, edx
nf23_31:mov ax, bx
shl eax, 16
nf23_32:mov ax, bx
mov [edi], eax
nf23_33:mov ax, bx
shl eax, 16
nf23_34:mov ax, bx
mov [edi+4], eax
add edi, edx
nf23_51:mov ax, bx
shl eax, 16
nf23_52:mov ax, bx
mov [edi], eax
nf23_53:mov ax, bx
shl eax, 16
nf23_54:mov ax, bx
mov [edi+4], eax
add edi, edx
nf23_71:mov ax, bx
shl eax, 16
nf23_72:mov ax, bx
mov [edi], eax
nf23_73:mov ax, bx
shl eax, 16
nf23_74:mov ax, bx
mov [edi+4], eax
sub edi, nfpk_back_right
add esi, 4
retn
;----------------------------------------
ALIGN 4
nf8: ; 2x2 4x4x1 (16 bytes)
mov ax, [esi]
cmp al, ah
ja nf24
xor eax, eax
lea ecx, nfpk_mov8
lea edx, byte ptr ds:nf8_11+2
mov al, [esi+2]
mov ebx, [ecx+eax*4]
mov [edx+(nf8_11-nf8_11)], bl
mov [edx+(nf8_12-nf8_11)], bh
mov al, [esi+3]
mov ebx, [ecx+eax*4]
mov [edx+(nf8_21-nf8_11)], bl
mov [edx+(nf8_22-nf8_11)], bh
mov al, [esi+6]
mov ebx, [ecx+eax*4]
mov [edx+(nf8_31-nf8_11)], bl
mov [edx+(nf8_32-nf8_11)], bh
mov al, [esi+7]
mov ebx, [ecx+eax*4]
mov [edx+(nf8_41-nf8_11)], bl
mov [edx+(nf8_42-nf8_11)], bh
add edx, nf8_51-nf8_11
mov al, [esi+10]
mov ebx, [ecx+eax*4]
mov [edx+(nf8_51-nf8_51)], bl
mov [edx+(nf8_52-nf8_51)], bh
mov al, [esi+11]
mov ebx, [ecx+eax*4]
mov [edx+(nf8_61-nf8_51)], bl
mov [edx+(nf8_62-nf8_51)], bh
mov al, [esi+14]
mov ebx, [ecx+eax*4]
mov [edx+(nf8_71-nf8_51)], bl
mov [edx+(nf8_72-nf8_51)], bh
mov al, [esi+15]
mov ebx, [ecx+eax*4]
mov [edx+(nf8_81-nf8_51)], bl
mov [edx+(nf8_82-nf8_51)], bh
push ebp
push esi
; load bx,dx,cx,bp with 00,01,10,11 color combinations
; (note that bits are read least significant first).
mov cx, [esi]
mov esi, nf_width
mov bl,cl
mov bh,cl
mov dl,ch
mov dh,cl
mov al,ch
mov ah,ch
mov ebp,eax
jmp nf8_0 ; flush prefetch
ALIGN 4
nf8_0:
nf8_11: mov ax, bx
shl eax, 16
nf8_12: mov ax, bx
mov [edi], eax
add edi, esi
nf8_21: mov ax, bx
shl eax, 16
nf8_22: mov ax, bx
mov [edi], eax
add edi, esi
mov eax, [esp]
mov cx, [eax+4]
mov bl,cl
mov bh,cl
mov dl,ch
mov dh,cl
mov al,ch
mov ah,ch
mov ebp,eax
nf8_31: mov ax, bx
shl eax, 16
nf8_32: mov ax, bx
mov [edi], eax
add edi, esi
nf8_41: mov ax, bx
shl eax, 16
nf8_42: mov ax, bx
mov [edi], eax
add edi, esi
lea eax, [esi*4-4]
sub edi, eax
mov eax, [esp]
mov cx, [eax+8]
mov bl,cl
mov bh,cl
mov dl,ch
mov dh,cl
mov al,ch
mov ah,ch
mov ebp,eax
nf8_51: mov ax, bx
shl eax, 16
nf8_52: mov ax, bx
mov [edi], eax
add edi, esi
nf8_61: mov ax, bx
shl eax, 16
nf8_62: mov ax, bx
mov [edi], eax
add edi, esi
mov eax, [esp]
mov cx, [eax+12]
mov bl,cl
mov bh,cl
mov dl,ch
mov dh,cl
mov al,ch
mov ah,ch
mov ebp,eax
nf8_71: mov ax, bx
shl eax, 16
nf8_72: mov ax, bx
mov [edi], eax
add edi, esi
nf8_81: mov ax, bx
shl eax, 16
nf8_82: mov ax, bx
mov [edi], eax
pop esi
pop ebp
add esi, 16
sub edi, 4
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
;----------------------------------------
ALIGN 4
;nf8+16
nf24: ; 2x1 4x8x1 (12 bytes)
mov ax, [esi+6]
cmp al, ah
ja nf40
xor eax, eax
lea ecx, nfpk_mov8
lea edx, byte ptr ds:nf24_11+2
mov al, [esi+2]
mov ebx, [ecx+eax*4]
mov [edx+(nf24_11-nf24_11)], bl
mov [edx+(nf24_12-nf24_11)], bh
mov al, [esi+3]
mov ebx, [ecx+eax*4]
mov [edx+(nf24_21-nf24_11)], bl
mov [edx+(nf24_22-nf24_11)], bh
mov al, [esi+4]
mov ebx, [ecx+eax*4]
mov [edx+(nf24_31-nf24_11)], bl
mov [edx+(nf24_32-nf24_11)], bh
mov al, [esi+5]
mov ebx, [ecx+eax*4]
mov [edx+(nf24_41-nf24_11)], bl
mov [edx+(nf24_42-nf24_11)], bh
add edx, nf24_51-nf24_11
mov al, [esi+8]
mov ebx, [ecx+eax*4]
mov [edx+(nf24_51-nf24_51)], bl
mov [edx+(nf24_52-nf24_51)], bh
mov al, [esi+9]
mov ebx, [ecx+eax*4]
mov [edx+(nf24_61-nf24_51)], bl
mov [edx+(nf24_62-nf24_51)], bh
mov al, [esi+10]
mov ebx, [ecx+eax*4]
mov [edx+(nf24_71-nf24_51)], bl
mov [edx+(nf24_72-nf24_51)], bh
mov al, [esi+11]
mov ebx, [ecx+eax*4]
mov [edx+(nf24_81-nf24_51)], bl
mov [edx+(nf24_82-nf24_51)], bh
push ebp
push esi
; load bx,dx,cx,bp with 00,01,10,11 color combinations
; (note that bits are read least significant first).
mov cx, [esi]
mov esi, nf_width
mov bl,cl
mov bh,cl
mov dl,ch
mov dh,cl
mov al,ch
mov ah,ch
mov ebp,eax
jmp nf24_0 ; flush prefetch
ALIGN 4
nf24_0:
nf24_11:mov ax, bx
shl eax, 16
nf24_12:mov ax, bx
mov [edi], eax
add edi, esi
nf24_21:mov ax, bx
shl eax, 16
nf24_22:mov ax, bx
mov [edi], eax
add edi, esi
nf24_31:mov ax, bx
shl eax, 16
nf24_32:mov ax, bx
mov [edi], eax
add edi, esi
nf24_41:mov ax, bx
shl eax, 16
nf24_42:mov ax, bx
mov [edi], eax
add edi, esi
lea eax, [esi*4-4]
sub edi, eax
mov eax, [esp]
mov cx, [eax+6]
mov bl,cl
mov bh,cl
mov dl,ch
mov dh,cl
mov al,ch
mov ah,ch
mov ebp,eax
nf24_51:mov ax, bx
shl eax, 16
nf24_52:mov ax, bx
mov [edi], eax
add edi, esi
nf24_61:mov ax, bx
shl eax, 16
nf24_62:mov ax, bx
mov [edi], eax
add edi, esi
nf24_71:mov ax, bx
shl eax, 16
nf24_72:mov ax, bx
mov [edi], eax
add edi, esi
nf24_81:mov ax, bx
shl eax, 16
nf24_82:mov ax, bx
mov [edi], eax
pop esi
pop ebp
add esi, 12
sub edi, 4
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
;----------------------------------------
ALIGN 4
;nf8+32
nf40: ; 1x2 8x4x1 (12 bytes)
xor eax, eax
lea ecx, nfpk_mov8
lea edx, byte ptr ds:nf40_11+2
mov al, [esi+2]
mov ebx, [ecx+eax*4]
mov [edx+(nf40_11-nf40_11)], bl
mov [edx+(nf40_12-nf40_11)], bh
shr ebx, 16
mov [edx+(nf40_13-nf40_11)], bl
mov [edx+(nf40_14-nf40_11)], bh
mov al, [esi+4]
mov ebx, [ecx+eax*4]
mov [edx+(nf40_31-nf40_11)], bl
mov [edx+(nf40_32-nf40_11)], bh
shr ebx, 16
mov [edx+(nf40_33-nf40_11)], bl
mov [edx+(nf40_34-nf40_11)], bh
add edx, nf40_51-nf40_11
mov al, [esi+8]
mov ebx, [ecx+eax*4]
mov [edx+(nf40_51-nf40_51)], bl
mov [edx+(nf40_52-nf40_51)], bh
shr ebx, 16
mov [edx+(nf40_53-nf40_51)], bl
mov [edx+(nf40_54-nf40_51)], bh
mov al, [esi+10]
mov ebx, [ecx+eax*4]
mov [edx+(nf40_71-nf40_51)], bl
mov [edx+(nf40_72-nf40_51)], bh
shr ebx, 16
mov [edx+(nf40_73-nf40_51)], bl
mov [edx+(nf40_74-nf40_51)], bh
push ebp
push esi
; load bx,dx,cx,bp with 00,01,10,11 color combinations
; (note that bits are read least significant first).
mov cx, [esi]
mov esi, nf_width
mov bl,cl
mov bh,cl
mov dl,ch
mov dh,cl
mov al,ch
mov ah,ch
mov ebp,eax
jmp nf40_0 ; flush prefetch
ALIGN 4
nf40_0:
nf40_11:mov ax, bx
shl eax, 16
nf40_12:mov ax, bx
mov [edi], eax
nf40_13:mov ax, bx
shl eax, 16
nf40_14:mov ax, bx
mov [edi+4], eax
add edi, esi
nf40_31:mov ax, bx
shl eax, 16
nf40_32:mov ax, bx
mov [edi], eax
nf40_33:mov ax, bx
shl eax, 16
nf40_34:mov ax, bx
mov [edi+4], eax
add edi, esi
mov eax, [esp]
mov cx, [eax+6]
mov bl,cl
mov bh,cl
mov dl,ch
mov dh,cl
mov al,ch
mov ah,ch
mov ebp,eax
nf40_51:mov ax, bx
shl eax, 16
nf40_52:mov ax, bx
mov [edi], eax
nf40_53:mov ax, bx
shl eax, 16
nf40_54:mov ax, bx
mov [edi+4], eax
add edi, esi
nf40_71:mov ax, bx
shl eax, 16
nf40_72:mov ax, bx
mov [edi], eax
nf40_73:mov ax, bx
shl eax, 16
nf40_74:mov ax, bx
mov [edi+4], eax
pop esi
pop ebp
add esi, 12
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
;----------------------------------------
ALIGN 4
nf9: ; 8x8x2 (20 bytes)
mov eax, [esi]
cmp al, ah
ja nf41
shr eax, 16
cmp al, ah
ja nf25
xor eax, eax
lea ecx, nfpk_mov4
lea edx, byte ptr ds:nf9_11+1
mov al, [esi+4]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_11-nf9_11)], bl
mov [edx+(nf9_12-nf9_11)], bh
shr ebx, 16
mov [edx+(nf9_13-nf9_11)], bl
mov [edx+(nf9_14-nf9_11)], bh
mov al, [esi+5]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_15-nf9_11)], bl
mov [edx+(nf9_16-nf9_11)], bh
shr ebx, 16
mov [edx+(nf9_17-nf9_11)], bl
mov [edx+(nf9_18-nf9_11)], bh
mov al, [esi+8]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_31-nf9_11)], bl
mov [edx+(nf9_32-nf9_11)], bh
shr ebx, 16
mov [edx+(nf9_33-nf9_11)], bl
mov [edx+(nf9_34-nf9_11)], bh
mov al, [esi+9]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_35-nf9_11)], bl
mov [edx+(nf9_36-nf9_11)], bh
shr ebx, 16
mov [edx+(nf9_37-nf9_11)], bl
mov [edx+(nf9_38-nf9_11)], bh
lea edx, [edx+(nf9_51-nf9_11)]
mov al, [esi+12]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_51-nf9_51)], bl
mov [edx+(nf9_52-nf9_51)], bh
shr ebx, 16
mov [edx+(nf9_53-nf9_51)], bl
mov [edx+(nf9_54-nf9_51)], bh
mov al, [esi+13]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_55-nf9_51)], bl
mov [edx+(nf9_56-nf9_51)], bh
shr ebx, 16
mov [edx+(nf9_57-nf9_51)], bl
mov [edx+(nf9_58-nf9_51)], bh
mov al, [esi+16]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_71-nf9_51)], bl
mov [edx+(nf9_72-nf9_51)], bh
shr ebx, 16
mov [edx+(nf9_73-nf9_51)], bl
mov [edx+(nf9_74-nf9_51)], bh
mov al, [esi+17]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_75-nf9_51)], bl
mov [edx+(nf9_76-nf9_51)], bh
shr ebx, 16
mov [edx+(nf9_77-nf9_51)], bl
mov [edx+(nf9_78-nf9_51)], bh
; Load bl,bh,cl,ch with four colors
mov bx, [esi]
mov cx, [esi+2]
mov edx, nf_width
jmp nf9_0 ; flush prefetch
ALIGN 4
nf9_0:
nf9_11: mov al, bl
nf9_12: mov ah, bl
shl eax, 16
nf9_13: mov al, bl
nf9_14: mov ah, bl
mov [edi], eax
nf9_15: mov al, bl
nf9_16: mov ah, bl
shl eax, 16
nf9_17: mov al, bl
nf9_18: mov ah, bl
mov [edi+4], eax
add edi, edx
nf9_31: mov al, bl
nf9_32: mov ah, bl
shl eax, 16
nf9_33: mov al, bl
nf9_34: mov ah, bl
mov [edi], eax
nf9_35: mov al, bl
nf9_36: mov ah, bl
shl eax, 16
nf9_37: mov al, bl
nf9_38: mov ah, bl
mov [edi+4], eax
add edi, edx
nf9_51: mov al, bl
nf9_52: mov ah, bl
shl eax, 16
nf9_53: mov al, bl
nf9_54: mov ah, bl
mov [edi], eax
nf9_55: mov al, bl
nf9_56: mov ah, bl
shl eax, 16
nf9_57: mov al, bl
nf9_58: mov ah, bl
mov [edi+4], eax
add edi, edx
nf9_71: mov al, bl
nf9_72: mov ah, bl
shl eax, 16
nf9_73: mov al, bl
nf9_74: mov ah, bl
mov [edi], eax
nf9_75: mov al, bl
nf9_76: mov ah, bl
shl eax, 16
nf9_77: mov al, bl
nf9_78: mov ah, bl
mov [edi+4], eax
add esi, 20
sub edi, nfpk_back_right
retn
;----------------------------------------
ALIGN 4
;nf9+16
nf25: ; low 4x4x2 (8 bytes)
if 0 ;debug
mov eax, 0
mov ebx, 0
add esi, 8
jmp nf_solid
endif
xor eax, eax
lea ecx, nfpk_mov4
lea edx, byte ptr ds:nf25_11+1
mov al, [esi+4]
mov ebx, [ecx+eax*4]
mov [edx+(nf25_14-nf25_11)], bl
mov [edx+(nf25_13-nf25_11)], bh
shr ebx, 16
mov [edx+(nf25_12-nf25_11)], bl
mov [edx+(nf25_11-nf25_11)], bh
mov al, [esi+5]
mov ebx, [ecx+eax*4]
mov [edx+(nf25_24-nf25_11)], bl
mov [edx+(nf25_23-nf25_11)], bh
shr ebx, 16
mov [edx+(nf25_22-nf25_11)], bl
mov [edx+(nf25_21-nf25_11)], bh
mov al, [esi+6]
mov ebx, [ecx+eax*4]
mov [edx+(nf25_34-nf25_11)], bl
mov [edx+(nf25_33-nf25_11)], bh
shr ebx, 16
mov [edx+(nf25_32-nf25_11)], bl
mov [edx+(nf25_31-nf25_11)], bh
mov al, [esi+7]
mov ebx, [ecx+eax*4]
mov [edx+(nf25_44-nf25_11)], bl
mov [edx+(nf25_43-nf25_11)], bh
shr ebx, 16
mov [edx+(nf25_42-nf25_11)], bl
mov [edx+(nf25_41-nf25_11)], bh
; Load bl,bh,cl,ch with four colors
mov bx, [esi]
mov cx, [esi+2]
mov edx, nf_width
jmp nf25_0 ; flush prefetch
ALIGN 4
nf25_0:
nf25_11:mov ah, bl
mov al, ah
shl eax, 16
nf25_12:mov al, bl
mov ah, al
mov [edi], eax
nf25_13:mov ah, bl
mov al, ah
shl eax, 16
nf25_14:mov al, bl
mov ah, al
mov [edi+4], eax
add edi, edx
nf25_21:mov ah, bl
mov al, ah
shl eax, 16
nf25_22:mov al, bl
mov ah, al
mov [edi], eax
nf25_23:mov ah, bl
mov al, ah
shl eax, 16
nf25_24:mov al, bl
mov ah, al
mov [edi+4], eax
add edi, edx
nf25_31:mov ah, bl
mov al, ah
shl eax, 16
nf25_32:mov al, bl
mov ah, al
mov [edi], eax
nf25_33:mov ah, bl
mov al, ah
shl eax, 16
nf25_34:mov al, bl
mov ah, al
mov [edi+4], eax
add edi, edx
nf25_41:mov ah, bl
mov al, ah
shl eax, 16
nf25_42:mov al, bl
mov ah, al
mov [edi], eax
nf25_43:mov ah, bl
mov al, ah
shl eax, 16
nf25_44:mov al, bl
mov ah, al
mov [edi+4], eax
add esi, 8
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
;----------------------------------------
ALIGN 4
;nf9+32
nf41: ; low 4x8x2 (12 bytes)
shr eax, 16
cmp al, ah
ja nf57
xor eax, eax
lea ecx, nfpk_mov4
lea edx, byte ptr ds:nf41_11+1
mov al, [esi+4]
mov ebx, [ecx+eax*4]
mov [edx+(nf41_14-nf41_11)], bl
mov [edx+(nf41_13-nf41_11)], bh
shr ebx, 16
mov [edx+(nf41_12-nf41_11)], bl
mov [edx+(nf41_11-nf41_11)], bh
mov al, [esi+6]
mov ebx, [ecx+eax*4]
mov [edx+(nf41_34-nf41_11)], bl
mov [edx+(nf41_33-nf41_11)], bh
shr ebx, 16
mov [edx+(nf41_32-nf41_11)], bl
mov [edx+(nf41_31-nf41_11)], bh
lea edx, [edx+(nf41_51-nf41_11)]
mov al, [esi+8]
mov ebx, [ecx+eax*4]
mov [edx+(nf41_54-nf41_51)], bl
mov [edx+(nf41_53-nf41_51)], bh
shr ebx, 16
mov [edx+(nf41_52-nf41_51)], bl
mov [edx+(nf41_51-nf41_51)], bh
mov al, [esi+10]
mov ebx, [ecx+eax*4]
mov [edx+(nf41_74-nf41_51)], bl
mov [edx+(nf41_73-nf41_51)], bh
shr ebx, 16
mov [edx+(nf41_72-nf41_51)], bl
mov [edx+(nf41_71-nf41_51)], bh
; Load bl,bh,cl,ch with four colors
mov bx, [esi]
mov cx, [esi+2]
mov edx, nf_width
jmp nf41_0 ; flush prefetch
ALIGN 4
nf41_0:
nf41_11:mov ah, bl
mov al, ah
shl eax, 16
nf41_12:mov al, bl
mov ah, al
mov [edi], eax
nf41_13:mov ah, bl
mov al, ah
shl eax, 16
nf41_14:mov al, bl
mov ah, al
mov [edi+4], eax
add edi, edx
nf41_31:mov ah, bl
mov al, ah
shl eax, 16
nf41_32:mov al, bl
mov ah, al
mov [edi], eax
nf41_33:mov ah, bl
mov al, ah
shl eax, 16
nf41_34:mov al, bl
mov ah, al
mov [edi+4], eax
add edi, edx
nf41_51:mov ah, bl
mov al, ah
shl eax, 16
nf41_52:mov al, bl
mov ah, al
mov [edi], eax
nf41_53:mov ah, bl
mov al, ah
shl eax, 16
nf41_54:mov al, bl
mov ah, al
mov [edi+4], eax
add edi, edx
nf41_71:mov ah, bl
mov al, ah
shl eax, 16
nf41_72:mov al, bl
mov ah, al
mov [edi], eax
nf41_73:mov ah, bl
mov al, ah
shl eax, 16
nf41_74:mov al, bl
mov ah, al
mov [edi+4], eax
add esi, 12
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
;----------------------------------------
ALIGN 4
;nf9+48
nf57: ; low 8x4x2 (12 bytes)
xor eax, eax
lea ecx, nfpk_mov4
lea edx, byte ptr ds:nf57_11+1
mov al, [esi+4]
mov ebx, [ecx+eax*4]
mov [edx+(nf57_11-nf57_11)], bl
mov [edx+(nf57_12-nf57_11)], bh
shr ebx, 16
mov [edx+(nf57_13-nf57_11)], bl
mov [edx+(nf57_14-nf57_11)], bh
mov al, [esi+5]
mov ebx, [ecx+eax*4]
mov [edx+(nf57_15-nf57_11)], bl
mov [edx+(nf57_16-nf57_11)], bh
shr ebx, 16
mov [edx+(nf57_17-nf57_11)], bl
mov [edx+(nf57_18-nf57_11)], bh
mov al, [esi+6]
mov ebx, [ecx+eax*4]
mov [edx+(nf57_21-nf57_11)], bl
mov [edx+(nf57_22-nf57_11)], bh
shr ebx, 16
mov [edx+(nf57_23-nf57_11)], bl
mov [edx+(nf57_24-nf57_11)], bh
mov al, [esi+7]
mov ebx, [ecx+eax*4]
mov [edx+(nf57_25-nf57_11)], bl
mov [edx+(nf57_26-nf57_11)], bh
shr ebx, 16
mov [edx+(nf57_27-nf57_11)], bl
mov [edx+(nf57_28-nf57_11)], bh
mov al, [esi+8]
mov ebx, [ecx+eax*4]
mov [edx+(nf57_31-nf57_11)], bl
mov [edx+(nf57_32-nf57_11)], bh
shr ebx, 16
mov [edx+(nf57_33-nf57_11)], bl
mov [edx+(nf57_34-nf57_11)], bh
mov al, [esi+9]
mov ebx, [ecx+eax*4]
mov [edx+(nf57_35-nf57_11)], bl
mov [edx+(nf57_36-nf57_11)], bh
shr ebx, 16
mov [edx+(nf57_37-nf57_11)], bl
mov [edx+(nf57_38-nf57_11)], bh
mov al, [esi+10]
mov ebx, [ecx+eax*4]
mov [edx+(nf57_41-nf57_11)], bl
mov [edx+(nf57_42-nf57_11)], bh
shr ebx, 16
mov [edx+(nf57_43-nf57_11)], bl
mov [edx+(nf57_44-nf57_11)], bh
mov al, [esi+11]
mov ebx, [ecx+eax*4]
mov [edx+(nf57_45-nf57_11)], bl
mov [edx+(nf57_46-nf57_11)], bh
shr ebx, 16
mov [edx+(nf57_47-nf57_11)], bl
mov [edx+(nf57_48-nf57_11)], bh
; Load bl,bh,cl,ch with four colors
mov bx, [esi]
mov cx, [esi+2]
mov edx, nf_width
jmp nf57_0 ; flush prefetch
ALIGN 4
nf57_0:
nf57_11:mov al, bl
nf57_12:mov ah, bl
shl eax, 16
nf57_13:mov al, bl
nf57_14:mov ah, bl
mov [edi], eax
nf57_15:mov al, bl
nf57_16:mov ah, bl
shl eax, 16
nf57_17:mov al, bl
nf57_18:mov ah, bl
mov [edi+4], eax
add edi, edx
nf57_21:mov al, bl
nf57_22:mov ah, bl
shl eax, 16
nf57_23:mov al, bl
nf57_24:mov ah, bl
mov [edi], eax
nf57_25:mov al, bl
nf57_26:mov ah, bl
shl eax, 16
nf57_27:mov al, bl
nf57_28:mov ah, bl
mov [edi+4], eax
add edi, edx
nf57_31:mov al, bl
nf57_32:mov ah, bl
shl eax, 16
nf57_33:mov al, bl
nf57_34:mov ah, bl
mov [edi], eax
nf57_35:mov al, bl
nf57_36:mov ah, bl
shl eax, 16
nf57_37:mov al, bl
nf57_38:mov ah, bl
mov [edi+4], eax
add edi, edx
nf57_41:mov al, bl
nf57_42:mov ah, bl
shl eax, 16
nf57_43:mov al, bl
nf57_44:mov ah, bl
mov [edi], eax
nf57_45:mov al, bl
nf57_46:mov ah, bl
shl eax, 16
nf57_47:mov al, bl
nf57_48:mov ah, bl
mov [edi+4], eax
add esi, 12
sub edi, nfpk_back_right
retn
;----------------------------------------
ALIGN 4
nf10: ; 2x2 4x4x2 (32 bytes)
mov ax, [esi]
cmp al, ah
ja nf26
xor eax, eax
lea ecx, nfpk_mov4
lea edx, byte ptr ds:nf10_11+1
mov al, [esi+4]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_11-nf10_11)], bl
mov [edx+(nf10_12-nf10_11)], bh
shr ebx, 16
mov [edx+(nf10_13-nf10_11)], bl
mov [edx+(nf10_14-nf10_11)], bh
mov al, [esi+6]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_21-nf10_11)], bl
mov [edx+(nf10_22-nf10_11)], bh
shr ebx, 16
mov [edx+(nf10_23-nf10_11)], bl
mov [edx+(nf10_24-nf10_11)], bh
mov al, [esi+12]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_31-nf10_11)], bl
mov [edx+(nf10_32-nf10_11)], bh
shr ebx, 16
mov [edx+(nf10_33-nf10_11)], bl
mov [edx+(nf10_34-nf10_11)], bh
mov al, [esi+14]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_41-nf10_11)], bl
mov [edx+(nf10_42-nf10_11)], bh
shr ebx, 16
mov [edx+(nf10_43-nf10_11)], bl
mov [edx+(nf10_44-nf10_11)], bh
lea edx, [edx+(nf10_51-nf10_11)]
mov al, [esi+20]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_51-nf10_51)], bl
mov [edx+(nf10_52-nf10_51)], bh
shr ebx, 16
mov [edx+(nf10_53-nf10_51)], bl
mov [edx+(nf10_54-nf10_51)], bh
mov al, [esi+22]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_61-nf10_51)], bl
mov [edx+(nf10_62-nf10_51)], bh
shr ebx, 16
mov [edx+(nf10_63-nf10_51)], bl
mov [edx+(nf10_64-nf10_51)], bh
mov al, [esi+28]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_71-nf10_51)], bl
mov [edx+(nf10_72-nf10_51)], bh
shr ebx, 16
mov [edx+(nf10_73-nf10_51)], bl
mov [edx+(nf10_74-nf10_51)], bh
mov al, [esi+30]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_81-nf10_51)], bl
mov [edx+(nf10_82-nf10_51)], bh
shr ebx, 16
mov [edx+(nf10_83-nf10_51)], bl
mov [edx+(nf10_84-nf10_51)], bh
; Load bl,bh,cl,ch with four colors
mov bx, [esi]
mov cx, [esi+2]
mov edx, nf_width
jmp nf10_0 ; flush prefetch
ALIGN 4
nf10_0:
nf10_11:mov al, bl
nf10_12:mov ah, bl
shl eax, 16
nf10_13:mov al, bl
nf10_14:mov ah, bl
mov [edi], eax
add edi, edx
nf10_21:mov al, bl
nf10_22:mov ah, bl
shl eax, 16
nf10_23:mov al, bl
nf10_24:mov ah, bl
mov [edi], eax
add edi, edx
; Load bl,bh,cl,ch with four colors
mov bx, [esi+8]
mov cx, [esi+10]
nf10_31:mov al, bl
nf10_32:mov ah, bl
shl eax, 16
nf10_33:mov al, bl
nf10_34:mov ah, bl
mov [edi], eax
add edi, edx
nf10_41:mov al, bl
nf10_42:mov ah, bl
shl eax, 16
nf10_43:mov al, bl
nf10_44:mov ah, bl
mov [edi], eax
add edi, edx
lea eax, [edx*4-4]
sub edi, eax
; Load bl,bh,cl,ch with four colors
mov bx, [esi+16]
mov cx, [esi+18]
nf10_51:mov al, bl
nf10_52:mov ah, bl
shl eax, 16
nf10_53:mov al, bl
nf10_54:mov ah, bl
mov [edi], eax
add edi, edx
nf10_61:mov al, bl
nf10_62:mov ah, bl
shl eax, 16
nf10_63:mov al, bl
nf10_64:mov ah, bl
mov [edi], eax
add edi, edx
; Load bl,bh,cl,ch with four colors
mov bx, [esi+24]
mov cx, [esi+26]
nf10_71:mov al, bl
nf10_72:mov ah, bl
shl eax, 16
nf10_73:mov al, bl
nf10_74:mov ah, bl
mov [edi], eax
add edi, edx
nf10_81:mov al, bl
nf10_82:mov ah, bl
shl eax, 16
nf10_83:mov al, bl
nf10_84:mov ah, bl
mov [edi], eax
add esi, 32
sub edi, 4
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
;----------------------------------------
ALIGN 4
;nf10+16
nf26: ; 2x1 4x8x2 (24 bytes)
mov ax, [esi+12]
cmp al, ah
ja nf42
if 0 ;debug
mov eax, 0
mov ebx, 0
add esi, 24
jmp nf_solid
endif
xor eax, eax
lea ecx, nfpk_mov4
lea edx, byte ptr ds:nf26_11+1
mov al, [esi+4]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_11-nf26_11)], bl
mov [edx+(nf26_12-nf26_11)], bh
shr ebx, 16
mov [edx+(nf26_13-nf26_11)], bl
mov [edx+(nf26_14-nf26_11)], bh
mov al, [esi+6]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_21-nf26_11)], bl
mov [edx+(nf26_22-nf26_11)], bh
shr ebx, 16
mov [edx+(nf26_23-nf26_11)], bl
mov [edx+(nf26_24-nf26_11)], bh
mov al, [esi+8]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_31-nf26_11)], bl
mov [edx+(nf26_32-nf26_11)], bh
shr ebx, 16
mov [edx+(nf26_33-nf26_11)], bl
mov [edx+(nf26_34-nf26_11)], bh
mov al, [esi+10]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_41-nf26_11)], bl
mov [edx+(nf26_42-nf26_11)], bh
shr ebx, 16
mov [edx+(nf26_43-nf26_11)], bl
mov [edx+(nf26_44-nf26_11)], bh
lea edx, [edx+(nf26_51-nf26_11)]
mov al, [esi+16]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_51-nf26_51)], bl
mov [edx+(nf26_52-nf26_51)], bh
shr ebx, 16
mov [edx+(nf26_53-nf26_51)], bl
mov [edx+(nf26_54-nf26_51)], bh
mov al, [esi+18]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_61-nf26_51)], bl
mov [edx+(nf26_62-nf26_51)], bh
shr ebx, 16
mov [edx+(nf26_63-nf26_51)], bl
mov [edx+(nf26_64-nf26_51)], bh
mov al, [esi+20]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_71-nf26_51)], bl
mov [edx+(nf26_72-nf26_51)], bh
shr ebx, 16
mov [edx+(nf26_73-nf26_51)], bl
mov [edx+(nf26_74-nf26_51)], bh
mov al, [esi+22]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_81-nf26_51)], bl
mov [edx+(nf26_82-nf26_51)], bh
shr ebx, 16
mov [edx+(nf26_83-nf26_51)], bl
mov [edx+(nf26_84-nf26_51)], bh
; Load bl,bh,cl,ch with four colors
mov bx, [esi]
mov cx, [esi+2]
mov edx, nf_width
jmp nf26_0 ; flush prefetch
ALIGN 4
nf26_0:
nf26_11:mov al, bl
nf26_12:mov ah, bl
shl eax, 16
nf26_13:mov al, bl
nf26_14:mov ah, bl
mov [edi], eax
add edi, edx
nf26_21:mov al, bl
nf26_22:mov ah, bl
shl eax, 16
nf26_23:mov al, bl
nf26_24:mov ah, bl
mov [edi], eax
add edi, edx
nf26_31:mov al, bl
nf26_32:mov ah, bl
shl eax, 16
nf26_33:mov al, bl
nf26_34:mov ah, bl
mov [edi], eax
add edi, edx
nf26_41:mov al, bl
nf26_42:mov ah, bl
shl eax, 16
nf26_43:mov al, bl
nf26_44:mov ah, bl
mov [edi], eax
add edi, edx
lea eax, [edx*4-4]
sub edi, eax
; Load bl,bh,cl,ch with four colors
mov bx, [esi+12]
mov cx, [esi+14]
nf26_51:mov al, bl
nf26_52:mov ah, bl
shl eax, 16
nf26_53:mov al, bl
nf26_54:mov ah, bl
mov [edi], eax
add edi, edx
nf26_61:mov al, bl
nf26_62:mov ah, bl
shl eax, 16
nf26_63:mov al, bl
nf26_64:mov ah, bl
mov [edi], eax
add edi, edx
nf26_71:mov al, bl
nf26_72:mov ah, bl
shl eax, 16
nf26_73:mov al, bl
nf26_74:mov ah, bl
mov [edi], eax
add edi, edx
nf26_81:mov al, bl
nf26_82:mov ah, bl
shl eax, 16
nf26_83:mov al, bl
nf26_84:mov ah, bl
mov [edi], eax
add esi, 24
sub edi, 4
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
;----------------------------------------
ALIGN 4
;nf10+32
nf42: ; 1x2 8x4x2 (24 bytes)
if 0 ;debug
mov eax, 0
mov ebx, 0
add esi, 24
jmp nf_solid
endif
xor eax, eax
lea ecx, nfpk_mov4
lea edx, byte ptr ds:nf42_11+1
mov al, [esi+4]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_11-nf42_11)], bl
mov [edx+(nf42_12-nf42_11)], bh
shr ebx, 16
mov [edx+(nf42_13-nf42_11)], bl
mov [edx+(nf42_14-nf42_11)], bh
mov al, [esi+5]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_15-nf42_11)], bl
mov [edx+(nf42_16-nf42_11)], bh
shr ebx, 16
mov [edx+(nf42_17-nf42_11)], bl
mov [edx+(nf42_18-nf42_11)], bh
mov al, [esi+8]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_31-nf42_11)], bl
mov [edx+(nf42_32-nf42_11)], bh
shr ebx, 16
mov [edx+(nf42_33-nf42_11)], bl
mov [edx+(nf42_34-nf42_11)], bh
mov al, [esi+9]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_35-nf42_11)], bl
mov [edx+(nf42_36-nf42_11)], bh
shr ebx, 16
mov [edx+(nf42_37-nf42_11)], bl
mov [edx+(nf42_38-nf42_11)], bh
lea edx, [edx+(nf42_51-nf42_11)]
mov al, [esi+16]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_51-nf42_51)], bl
mov [edx+(nf42_52-nf42_51)], bh
shr ebx, 16
mov [edx+(nf42_53-nf42_51)], bl
mov [edx+(nf42_54-nf42_51)], bh
mov al, [esi+17]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_55-nf42_51)], bl
mov [edx+(nf42_56-nf42_51)], bh
shr ebx, 16
mov [edx+(nf42_57-nf42_51)], bl
mov [edx+(nf42_58-nf42_51)], bh
mov al, [esi+20]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_71-nf42_51)], bl
mov [edx+(nf42_72-nf42_51)], bh
shr ebx, 16
mov [edx+(nf42_73-nf42_51)], bl
mov [edx+(nf42_74-nf42_51)], bh
mov al, [esi+21]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_75-nf42_51)], bl
mov [edx+(nf42_76-nf42_51)], bh
shr ebx, 16
mov [edx+(nf42_77-nf42_51)], bl
mov [edx+(nf42_78-nf42_51)], bh
; Load bl,bh,cl,ch with four colors
mov bx, [esi]
mov cx, [esi+2]
mov edx, nf_width
jmp nf42_0 ; flush prefetch
ALIGN 4
nf42_0:
nf42_11:mov al, bl
nf42_12:mov ah, bl
shl eax, 16
nf42_13:mov al, bl
nf42_14:mov ah, bl
mov [edi], eax
nf42_15:mov al, bl
nf42_16:mov ah, bl
shl eax, 16
nf42_17:mov al, bl
nf42_18:mov ah, bl
mov [edi+4], eax
add edi, edx
nf42_31:mov al, bl
nf42_32:mov ah, bl
shl eax, 16
nf42_33:mov al, bl
nf42_34:mov ah, bl
mov [edi], eax
nf42_35:mov al, bl
nf42_36:mov ah, bl
shl eax, 16
nf42_37:mov al, bl
nf42_38:mov ah, bl
mov [edi+4], eax
add edi, edx
; Load bl,bh,cl,ch with four colors
mov bx, [esi+12]
mov cx, [esi+14]
nf42_51:mov al, bl
nf42_52:mov ah, bl
shl eax, 16
nf42_53:mov al, bl
nf42_54:mov ah, bl
mov [edi], eax
nf42_55:mov al, bl
nf42_56:mov ah, bl
shl eax, 16
nf42_57:mov al, bl
nf42_58:mov ah, bl
mov [edi+4], eax
add edi, edx
nf42_71:mov al, bl
nf42_72:mov ah, bl
shl eax, 16
nf42_73:mov al, bl
nf42_74:mov ah, bl
mov [edi], eax
nf42_75:mov al, bl
nf42_76:mov ah, bl
shl eax, 16
nf42_77:mov al, bl
nf42_78:mov ah, bl
mov [edi+4], eax
add esi, 24
sub edi, nfpk_back_right
retn
;----------------------------------------
ALIGN 4
nf11: ; 8x8x8 (64 bytes)
if 0 ;debug
add esi, 64
mov eax, 0fefefefeH
; mov ebx, eax
mov ebx, 0
jmp nf_solid
endif
mov edx, nf_width
mov eax, [esi] ;0
mov [edi], eax
mov eax, [esi+4]
mov [edi+4], eax
add edi, edx
mov eax, [esi+16] ;2
mov [edi], eax
mov eax, [esi+20]
mov [edi+4], eax
add edi, edx
mov eax, [esi+32] ;4
mov [edi], eax
mov eax, [esi+36]
mov [edi+4], eax
add edi, edx
mov eax, [esi+48] ;6
mov [edi], eax
mov eax, [esi+52]
mov [edi+4], eax
add esi, 64
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
;----------------------------------------
ALIGN 4
nf12: ; low 4x4x8 (16 bytes)
mov edx, nf_width
mov eax, [esi]
mov bl, ah
mov bh, ah
shl ebx, 16
mov bl, al
mov bh, al
mov [edi], ebx
shr eax, 16
mov bl, ah
mov bh, ah
shl ebx, 16
mov bl, al
mov bh, al
mov [edi+4], ebx
add edi, edx
mov eax, [esi+4]
mov bl, ah
mov bh, ah
shl ebx, 16
mov bl, al
mov bh, al
mov [edi], ebx
shr eax, 16
mov bl, ah
mov bh, ah
shl ebx, 16
mov bl, al
mov bh, al
mov [edi+4], ebx
add edi, edx
mov eax, [esi+8]
mov bl, ah
mov bh, ah
shl ebx, 16
mov bl, al
mov bh, al
mov [edi], ebx
shr eax, 16
mov bl, ah
mov bh, ah
shl ebx, 16
mov bl, al
mov bh, al
mov [edi+4], ebx
add edi, edx
mov eax, [esi+12]
mov bl, ah
mov bh, ah
shl ebx, 16
mov bl, al
mov bh, al
mov [edi], ebx
shr eax, 16
mov bl, ah
mov bh, ah
shl ebx, 16
mov bl, al
mov bh, al
mov [edi+4], ebx
sub edi, nfpk_back_right
add esi, 16
retn
;----------------------------------------
ALIGN 4
nf13: ; 2x2 4x4x0 (4 bytes)
mov edx, nf_width
mov cl, [esi]
mov ch, cl
mov eax, ecx
shl eax, 16
mov ax, cx
mov cl, [esi+1]
mov ch, cl
mov ebx, ecx
shl ebx, 16
mov bx, cx
mov [edi], eax
mov [edi+4], ebx
mov [edi+edx], eax
mov [edi+edx+4], ebx
lea edi, [edi+edx*2]
mov cl, [esi+2]
mov ch, cl
mov eax, ecx
shl eax, 16
mov ax, cx
mov cl, [esi+3]
mov ch, cl
mov ebx, ecx
shl ebx, 16
mov bx, cx
mov [edi], eax
mov [edi+4], ebx
add edi, edx
mov [edi], eax
mov [edi+4], ebx
sub edi, nfpk_back_right
add esi, 4
retn
;----------------------------------------
ALIGN 4
nf14: ; 8x8x0 (1 byte)
mov bl, [esi] ; Copy color into 8 positions
inc esi
mov bh, bl
mov eax, ebx
shl eax, 16
mov ax, bx
mov ebx, eax
if 0 ;debug
mov eax, 080808080h
mov ebx, eax
endif
jmp nf_solid
retn
;----------------------------------------
ALIGN 4
nf15: ; mix 8x8x0 (2 bytes)
mov bx, [esi] ; Copy 2 colors into 8 positions
add esi, 2 ; in a checkerboard
mov ax, bx
shl eax, 16
mov ax, bx
mov ebx, eax
rol ebx, 8
if 0 ;debug
mov eax, 080808080h
mov ebx, eax
endif
nf_solid:
mov edx, nf_width
mov [edi], eax
mov [edi+4], eax
add edi, edx
mov [edi], ebx
mov [edi+4], ebx
add edi, edx
mov [edi], eax
mov [edi+4], eax
add edi, edx
mov [edi], ebx
mov [edi+4], ebx
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
nfPkDecompH ENDP
; If at least 11 palette entries aren't changed, this is more compact
; than uncompressed 256 entry palette.
;
;static void palLoadCompPalette(unsigned char *buf)
;
palLoadCompPalette PROC USES ESI EDI, \
buf: PTRBYTE
mov ax, ds ; Insure es==ds for symantec flat mode
mov es, ax
mov cx, 32
mov esi, buf
mov edi, offset pal_tbl
next: lodsb
or al, al
jnz chk0
add edi, 24
loop next
jmp done
chk0: test al, 1
jz not0
movsw
movsb
test al, 2
jz not1
cpy1: movsw
movsb
test al, 4
jz not2
cpy2: movsw
movsb
test al, 8
jz not3
cpy3: movsw
movsb
test al, 16
jz not4
cpy4: movsw
movsb
test al, 32
jz not5
cpy5: movsw
movsb
test al, 64
jz not6
cpy6: movsw
movsb
or al, al
jns not7
cpy7: movsw
movsb
loop next
jmp done
not0: add edi, 3
test al, 2
jnz cpy1
not1: add edi, 3
test al, 4
jnz cpy2
not2: add edi, 3
test al, 8
jnz cpy3
not3: add edi, 3
test al, 16
jnz cpy4
not4: add edi, 3
test al, 32
jnz cpy5
not5: add edi, 3
test al, 64
jnz cpy6
not6: add edi, 3
or al, al
js cpy7
not7: add edi, 3
loop next
done: ret
palLoadCompPalette ENDP
EXTERN snd_8to16: WORD ; short snd_8to16[256];
;unsigned sndDecompM16(unsigned short *dst, const unsigned char *src,
; unsigned len, unsigned prev);
;
;Decompresses a mono stream containing len samples
;(src is len bytes, dst is len*2 bytes)
;prev is the previous decompression state or zero.
;Returns new decompression state.
;
sndDecompM16 PROC USES ESI EDI EBX, \
dst:PTRWORD, src:PTRBYTE, len:DWORD, prev:DWORD
mov eax, prev
mov ecx, len
jecxz done
mov esi, src
mov edi, dst
xor ebx, ebx
lp: mov bl, byte ptr [esi]
add esi, 1
add ax, word ptr snd_8to16[ebx*2]
mov word ptr [edi], ax
add edi, 2
dec ecx
jnz lp
done: ret
sndDecompM16 ENDP