Descent3/libmve/mvelibwa.asm
2024-04-15 21:43:29 -06:00

15055 lines
260 KiB
NASM

; mvelibwa.c
;
; Interplay Movie (MVE) File Player Library (32-Bit Win95 Version)
; Assembly Language Components
; Written by Paul Allen Edelstein
;
; (c) 1997 Interplay Productions. All Rights Reserved.
; This file is confidential and consists of proprietary information
; of Interplay Productions. This file and associated libraries
; may not, in whole or in part, be disclosed to third parties,
; incorporated into any software product which is not being created
; for Interplay Productions, copied or duplicated in any form,
; without the prior written permission of Interplay Productions.
; Further, you may not reverse engineer, decompile or otherwise
; attempt to derive source code of this material.
;
; .386
.486 ; I only need .386, but I wanted the 486 cycle timings
ifdef SYMANTEC
.MODEL SMALL, C
DGROUP group _TEXT, _DATA
else
.MODEL FLAT, C
endif
;;--- Options ---
ONLYNEW equ 0 ; For debug, disables motion comp
LOGGING equ 0 ; Log timing statistics
PARTIAL equ 1 ; Support for partial updates
PKDATA equ 1 ; Support for packed data
HICOLOR equ 1 ; Support for HiColor
INTERP equ 0 ; Interpolated squares
; 0:none (4x4x8), 1:generic dither,
; 2:direction dither, 3:blend
COMPOPS equ 1 ; Compressed opcode table
SCALING equ 1 ; Scaling support
DECOMPD equ 0 ; Support for dithered half vert res
TRANS16 equ 1 ; Support for translating 16-bit rgb format
;;--- Types ---
PTRBYTE TYPEDEF PTR BYTE
PTRWORD TYPEDEF PTR WORD
PTRDWORD TYPEDEF PTR DWORD
PTRPROC TYPEDEF PTR PROC
;;--- Constants ---
; Width and height of sections in pixels.
SWIDTH equ 8
SHEIGHT equ 8
LOG2_SWIDTH equ 3
LOG2_SHEIGHT equ 3
;;---
EXTERN pal_tbl:BYTE ; unsigned char pal_tbl[3*256];
EXTERN pal15_tbl:WORD ; unsigned short pal15_tbl[256];
if INTERP eq 3
EXTERN blend_tbl: PTRDWORD ; unsigned *blend_tbl;
endif
.data
BYTE "(c) 1997 Interplay Productions. All Rights Reserved.\n"
BYTE "This file is confidential and consists of proprietary information\n"
BYTE "of Interplay Productions. This file and associated libraries\n"
BYTE "may not, in whole or in part, be disclosed to third parties,\n"
BYTE "incorporated into any software product which is not being created\n"
BYTE "for Interplay Productions, copied or duplicated in any form,\n"
BYTE "without the prior written permission of Interplay Productions.\n"
BYTE "Further, you may not reverse engineer, decompile or otherwise\n"
BYTE "attempt to derive source code of this material.\n",0
.code
PUBLIC mveliba_start, mveliba_end
mveliba_start:
;----------------------------------------------------------------------
; Logging Support
;-----------------
if LOGGING
;void logLabel(char *label)
;
logLabel PROTO lbl:PTRBYTE
LOG_LABEL MACRO msg
LOCAL lbl
.data
lbl BYTE msg,0
.code
INVOKE logLabel, offset lbl
ENDM
else
LOG_LABEL MACRO msg
ENDM
endif
;--------------------------------------------------------------------
; Sound Management
;--------------------
EXTERN snd_8to16: WORD ; short snd_8to16[256];
;unsigned sndDecompM16(unsigned short *dst, unsigned char *src,
; unsigned len, unsigned prev);
;
;Decompresses a mono stream containing len samples
;(src is len bytes, dst is len*2 bytes)
;prev is the previous decompression state or zero.
;Returns new decompression state.
;
sndDecompM16 PROC USES ESI EDI EBX, \
dst:PTRWORD, src:PTRBYTE, len:DWORD, prev:DWORD
mov eax, prev
mov ecx, len
jecxz done
mov esi, src
mov edi, dst
xor ebx, ebx
lp: mov bl, byte ptr [esi]
add esi, 1
add ax, word ptr snd_8to16[ebx*2]
mov word ptr [edi], ax
add edi, 2
dec ecx
jnz lp
done: ret
sndDecompM16 ENDP
;unsigned sndDecompS16(unsigned short *dst, unsigned char *src,
; unsigned len, unsigned prev);
;
;Decompresses a stereo stream containing len samples
;(src is len*2 bytes, dst is len*4 bytes)
;prev is the previous decompression state or zero
; (It encodes the 16-bit states of the two stereo channels
; in its low and high order 16-bit halves.)
;Returns new decompression state.
;
sndDecompS16 PROC USES ESI EDI EBX, \
dst:PTRWORD, src:PTRBYTE, len:DWORD, prev:DWORD
movzx eax, word ptr prev
movzx edx, word ptr prev+2
mov ecx, len
jecxz done
mov esi, src
mov edi, dst
xor ebx, ebx
lp: mov bl, byte ptr [esi]
add esi, 1
add ax, word ptr snd_8to16[ebx*2]
mov word ptr [edi], ax
add edi, 2
mov bl, byte ptr [esi]
add esi, 1
add dx, word ptr snd_8to16[ebx*2]
mov word ptr [edi], dx
add edi, 2
dec ecx
jnz lp
done: shl edx, 16
or eax, edx
ret
sndDecompS16 ENDP
;--------------------------------------------------------------------
; NextFrame (Video Decompression)
;----------------------------------
;; NextFrame working storage
; MemRec nf_mem_buf1;
; MemRec nf_mem_buf2;
EXTERN nf_buf_cur: PTRBYTE ; unsigned char* nf_buf_cur;
EXTERN nf_buf_prv: PTRBYTE ; unsigned char* nf_buf_prv;
;; NextFrame parameters
EXTERN nf_wqty: BYTE ;unsigned char nf_wqty; // (width/SWIDTH)
EXTERN nf_hqty: BYTE ;unsigned char nf_hqty; // (height/SHEIGHT)
EXTERN nf_fqty: BYTE ;unsigned char nf_fqty; // Number of fields
if HICOLOR
EXTERN nf_hicolor: DWORD ;unsigned nf_hicolor; // HiColor (0:none,1:normal,2:swapped)
endif
;; <derived quantities>
EXTERN nf_width: DWORD ;unsigned nf_width; // wqty * SWIDTH
EXTERN nf_height: DWORD ;unsigned nf_height; // hqty * SHEIGHT;
EXTERN nf_new_line: DWORD ;unsigned nf_new_line; // width - SWIDTH
EXTERN nf_new_row0: DWORD ;unsigned nf_new_row0; // SHEIGHT*width*2-width
EXTERN nf_back_right: DWORD ;unsigned nf_back_right; // (SHEIGHT-1)*width
;; Frame parameters
;; Portion of current frame which has been updated
;; and needs to be sent to screen.
;;
EXTERN nf_new_x: DWORD ;unsigned nf_new_x;
EXTERN nf_new_y: DWORD ;unsigned nf_new_y;
EXTERN nf_new_w: DWORD ;unsigned nf_new_w;
EXTERN nf_new_h: DWORD ;unsigned nf_new_h;
NF_DECOMP_INIT MACRO HI_COLOR_FLAG: REQ
mov ax, ds ; Insure es==ds for symantec flat mode
mov es, ax
mov eax, nf_buf_prv ; DiffBufPtrs = nf_buf_prv - nf_buf_cur
sub eax, nf_buf_cur
mov DiffBufPtrs, eax
xor ebx, ebx ; ebx = nf_fqty (convert to 32-bits)
mov bl, nf_fqty
mov eax, x ; nf_new_x = x*SWIDTH*2^HI_COLOR_FLAG;
shl eax, LOG2_SWIDTH+HI_COLOR_FLAG
mov nf_new_x, eax
mov eax, w ; nf_new_w = w*SWIDTH*2^HI_COLOR_FLAG;
shl eax, LOG2_SWIDTH+HI_COLOR_FLAG
mov nf_new_w, eax
mov eax, y ; nf_new_y = y*nf_fqty*SHEIGHT;
shl eax, LOG2_SHEIGHT
mul ebx ;nf_fqty
mov nf_new_y, eax
mov eax, h ; nf_new_h = h*nf_fqty*SHEIGHT;
shl eax, LOG2_SHEIGHT
mul ebx ;nf_fqty
mov nf_new_h, eax
mov eax, nf_new_row0 ; new_row = nf_new_row0 - nf_new_w;
sub eax, nf_new_w
mov new_row, eax
;; Move to correct place in current buffer
mov eax, nf_buf_cur ; tbuf = nf_buf_cur
mov tbuf, eax
.if x || y ; if (x||y)
mov eax, nf_new_y ; tbuf += nf_new_y*nf_width + nf_new_x;
mul nf_width
add eax, nf_new_x
add tbuf, eax
.endif
ENDM ; DECOMP_INIT
DECOMP_BODY MACRO HI_COLOR_FLAG:REQ
LOCAL HI_COLOR_SCALE
HI_COLOR_SCALE equ HI_COLOR_FLAG+1
NF_DECOMP_INIT HI_COLOR_FLAG
mov eax, w ; parms_sz = (w*h*nf_fqty)<<1
mul h
mul ebx ;nf_fqty
shl eax, 1
mov parms_sz, eax
; esi indexes comp (to get new section data)
; edi indexes current screen buffer
; edx is a frequently used constant
; ebx indexes section params
mov edi, tbuf
mov edx, nf_new_line ; width - SWIDTH
mov ebx, comp ; Parms index
mov esi, ebx
add esi, parms_sz ; Skip over flags (w*h*2)
; Iterate over params and copy new hires data to appropriate sections.
mov cl, nf_fqty
ns_0f: push ecx
push edi
mov ch, byte ptr h
ns_0: mov cl, byte ptr w
ns_1: cmp word ptr [ebx],0
je ns_10
add edi, SWIDTH*HI_COLOR_SCALE
ns_2: add ebx, 2
dec cl
jnz ns_1
add edi, new_row ; SHEIGHT*width - SWIDTH*w
dec ch
jnz ns_0
pop edi
pop ecx
add edi, nf_width
dec cl
jnz ns_0f
jmp ns_99
; Copy new data to one section
; Enter with esi pointing to source data, edi to screen section.
; Assumes SWIDTH=8 (16-bit data) and SHEIGHT=8
ns_10:
REPEAT 7
REPEAT 2*HI_COLOR_SCALE
movsd
ENDM
add edi, edx
ENDM
REPEAT 2*HI_COLOR_SCALE
movsd
ENDM
sub edi, nf_back_right ; (SHEIGHT-1)*width
jmp ns_2
ns_99:
ife ONLYNEW ; if !ONLYNEW
; Iterate over flags and motion source addresses from params
; to determine which sections to move.
; ebx indexes params.
; esi indexes source from buffer
; esi will be computed as +- 16K relative to edi.
sub ebx, parms_sz ; Move back to start of section parms
mov edi, tbuf
mov cl, nf_fqty
xor esi, esi
ms_0f: push ecx
push edi
mov ch, byte ptr h
ms_0: mov cl, byte ptr w
ms_1: or si, [ebx]
jg ms_10
jl ms_j30
add edi, SWIDTH*HI_COLOR_SCALE
ms_2: add ebx, 2
dec cl
jnz ms_1
add edi, new_row ; SHEIGHT*width - SWIDTH*w
dec ch
jnz ms_0
pop edi
pop ecx
add edi, nf_width
dec cl
jnz ms_0f
jmp ms_99
ms_j30: jmp ms_30
; Move one section from current screen to current screen.
; Enter with
; edi pointing to destination screen section,
; relative value of source offset in esi.
; The following assumes SWIDTH==8 and SHEIGHT==8
ms_10: ; Make esi absolute
lea esi, [esi*HI_COLOR_SCALE-04000h*HI_COLOR_SCALE+edi]
REPEAT 7
REPEAT 2*HI_COLOR_SCALE
movsd
ENDM
add esi, edx
add edi, edx
ENDM
REPEAT 2*HI_COLOR_SCALE
movsd
ENDM
sub edi, nf_back_right ; (SHEIGHT-1)*width
xor esi, esi ; Reset esi to zero
jmp ms_2
ms_20f: push ecx
push edi
mov ch, byte ptr h
ms_20: mov cl, byte ptr w
ms_21: or si, [ebx]
jl ms_30
jg ms_j10
add edi, SWIDTH*HI_COLOR_SCALE
ms_22: add ebx, 2
dec cl
jnz ms_21
add edi, new_row ; SHEIGHT*width - SWIDTH*w
dec ch
jnz ms_20
pop edi
pop ecx
add edi, nf_width
dec cl
jnz ms_20f
jmp ms_99
ms_j10: jmp ms_10
; Move one section from previous screen to current screen.
; Enter with
; edi pointing to destination screen section,
; relative value of source offset in esi.
; The following assumes SWIDTH==8 and SHEIGHT==8
ms_30: ; Make esi absolute
lea esi, [esi*HI_COLOR_SCALE-0C000h*HI_COLOR_SCALE+edi]
add esi, DiffBufPtrs ; and point to other buffer
REPEAT 7
REPEAT 2*HI_COLOR_SCALE
movsd
ENDM
add esi, edx
add edi, edx
ENDM
REPEAT 2*HI_COLOR_SCALE
movsd
ENDM
sub edi, nf_back_right ; (SHEIGHT-1)*width
xor esi, esi ; Reset esi to zero
jmp ms_22
ms_99:
endif ; #endif !ONLYNEW
ENDM ; DECOMP_BODY
if PARTIAL
DECOMP_CHG_BODY MACRO HI_COLOR_FLAG:REQ
LOCAL HI_COLOR_SCALE
HI_COLOR_SCALE equ HI_COLOR_FLAG+1
NF_DECOMP_INIT HI_COLOR_FLAG
; esi indexes comp (to get new section data)
; edi indexes current screen buffer
; edx is a frequently used constant
; ebx indexes section params
mov edi, tbuf
mov edx, nf_new_line ; width - SWIDTH
mov esi, comp
mov ebx, parms
; Iterate over params and copy new hires data to appropriate sections.
mov eax, chgs
mov pChgs, eax
mov eax, 0
mov cl, nf_fqty
ns_0f: push ecx
push edi
mov ch, byte ptr h
ns_0: mov cl, byte ptr w
ns_1: add ax, ax
ja ns_1b
jz ns_5
cmp word ptr [ebx],0
je ns_10
add ebx, 2
ns_1b: add edi, SWIDTH*HI_COLOR_SCALE
ns_2: dec cl
jnz ns_1
add edi, new_row ; SHEIGHT*width - SWIDTH*w
dec ch
jnz ns_0
pop edi
pop ecx
add edi, nf_width
dec cl
jnz ns_0f
jmp ns_99
ns_5: mov eax, pChgs
add pChgs, 2
mov ax, [eax]
jmp ns_1
; Copy new data to one section
; Enter with ds:si pointing to source data, es:di to screen section.
; Assumes SWIDTH=8 (16-bit data) and SHEIGHT=8
ns_10:
REPEAT 7
REPEAT 2*HI_COLOR_SCALE
movsd
ENDM
add edi, edx
ENDM
REPEAT 2*HI_COLOR_SCALE
movsd
ENDM
sub edi, nf_back_right ; (SHEIGHT-1)*width
add ebx, 2
jmp ns_2
ns_99:
ife ONLYNEW ; if !ONLYNEW
; Iterate over flags and motion source addresses from params
; to determine which sections to move.
; ebx indexes params.
; esi indexes source from buffer
; esi will be computed as +- 16K relative to edi.
mov edi, tbuf
mov ebx, parms
mov eax, chgs
mov pChgs, eax
mov eax, 0
mov cl, byte ptr nf_fqty
xor esi, esi
ms_0f: push ecx
push edi
mov ch, byte ptr h
ms_0: mov cl, byte ptr w
ms_1: add ax, ax
ja ms_1b
jz ms_5
or si, [ebx]
jg ms_10
jl ms_j30
add ebx, 2
ms_1b: add edi, SWIDTH*HI_COLOR_SCALE
ms_2: dec cl
jnz ms_1
add edi, new_row ; SHEIGHT*width - SWIDTH*w
dec ch
jnz ms_0
pop edi
pop ecx
add edi, nf_width
dec cl
jnz ms_0f
jmp ms_99
ms_5: mov eax, pChgs
add pChgs, 2
mov ax, word ptr [eax]
jmp ms_1
ms_j30: jmp ms_30
; Move one section from current screen to current screen.
; Enter with
; edi pointing to destination screen section,
; relative value of source offset in esi.
; The following assumes SWIDTH==8 and SHEIGHT==8
ms_10: ; Make esi absolute
lea esi, [esi*HI_COLOR_SCALE-04000h*HI_COLOR_SCALE+edi]
REPEAT 7
REPEAT 2*HI_COLOR_SCALE
movsd
ENDM
add esi, edx
add edi, edx
ENDM
REPEAT 2*HI_COLOR_SCALE
movsd
ENDM
sub edi, nf_back_right ; (SHEIGHT-1)*width
xor esi, esi ; Reset esi to zero
add ebx, 2
jmp ms_2
ms_20f: push ecx
push edi
mov ch, byte ptr h
ms_20: mov cl, byte ptr w
ms_21: add ax, ax
ja ms_21b
jz ms_25
or si, [ebx]
jl ms_30
jg ms_j10
add ebx, 2
ms_21b: add edi, SWIDTH*HI_COLOR_SCALE
ms_22: dec cl
jnz ms_21
add edi, new_row ; SHEIGHT*width - SWIDTH*w
dec ch
jnz ms_20
pop edi
pop ecx
add edi, nf_width
dec cl
jnz ms_20f
jmp ms_99
ms_25: mov eax, pChgs
add pChgs, 2
mov ax, [eax]
jmp ms_21
ms_j10: jmp ms_10
; Move one section from previous screen to current screen.
; Enter with
; edi pointing to destination screen section,
; relative value of source offset in esi.
; The following assumes SWIDTH==8 and SHEIGHT==8
ms_30: ; Make esi absolute
lea esi, [esi*HI_COLOR_SCALE-0C000h*HI_COLOR_SCALE+edi]
add esi, DiffBufPtrs ; and point to other buffer
REPEAT 7
REPEAT 2*HI_COLOR_SCALE
movsd
ENDM
add esi, edx
add edi, edx
ENDM
REPEAT 2*HI_COLOR_SCALE
movsd
ENDM
sub edi, nf_back_right ; (SHEIGHT-1)*width
add ebx, 2
xor esi, esi ; Reset esi to zero
jmp ms_22
ms_99:
endif ; !ONLYNEW
ENDM ; DECOMP_CHG_BODY
endif ; PARTIAL
;;--- HiColor versions
if HICOLOR
; Decompress into subsection of current buffer specified
; by x,y,w,h in units of SWIDTHxSHEIGHT (8x8).
;
;void
;nfHiColorDecomp(unsigned char *comp,
; unsigned x, unsigned y, unsigned w, unsigned h)
;
nfHiColorDecomp PROC USES ESI EDI EBX, \
comp:PTRBYTE, \
x:DWORD, y:DWORD, w:DWORD, h:DWORD
LOCAL tbuf: PTRBYTE
LOCAL new_row: DWORD
LOCAL DiffBufPtrs: DWORD
LOCAL parms_sz: DWORD
LOG_LABEL "StartHiColorDecomp"
DECOMP_BODY 1 ; HiColor
LOG_LABEL "EndHiColorDecomp"
ret
nfHiColorDecomp ENDP
if PARTIAL
; Decompress into subsection of current buffer specified
; by x,y,w,h in units of SWIDTHxSHEIGHT (8x8).
; Chgs specifies which squares to update.
; Parms are motion parms for squares to update.
;
;void
;nfHiColorDecompChg(unsigned short *chgs,
; unsigned short *parms,
; unsigned char *comp,
; unsigned x, unsigned y, unsigned w, unsigned h)
;
nfHiColorDecompChg PROC USES ESI EDI EBX, \
chgs:PTRWORD, \
parms:PTRWORD, \
comp:PTRBYTE, \
x:DWORD, y:DWORD, w:DWORD, h:DWORD
LOCAL tbuf: PTRBYTE
LOCAL new_row: DWORD
LOCAL DiffBufPtrs: DWORD
LOCAL pChgs: PTRBYTE
LOG_LABEL "StartHiColorDecompChg"
DECOMP_CHG_BODY 1 ; HiColor
LOG_LABEL "EndHiColorDecompChg"
ret
nfHiColorDecompChg ENDP
endif ; PARTIAL
endif ; HICOLOR
; Non-HiColor versions
; Decompress into subsection of current buffer specified
; by x,y,w,h in units of SWIDTHxSHEIGHT (8x8).
;
;void nfDecomp(unsigned char *comp,
; unsigned x, unsigned y, unsigned w, unsigned h)
;
nfDecomp PROC USES ESI EDI EBX, \
comp:PTRBYTE, \
x:DWORD, y:DWORD, w:DWORD, h:DWORD
LOCAL tbuf: PTRBYTE
LOCAL new_row: DWORD
LOCAL DiffBufPtrs: DWORD
LOCAL parms_sz: DWORD
if HICOLOR
.if nf_hicolor
INVOKE nfHiColorDecomp, comp,x,y,w,h
ret
.endif
endif
LOG_LABEL "StartDecomp"
DECOMP_BODY 0 ; Not HiColor
LOG_LABEL "EndDecomp"
ret
nfDecomp ENDP
if PARTIAL
; Decompress into subsection of current buffer specified
; by x,y,w,h in units of SWIDTHxSHEIGHT (8x8).
; Chgs specifies which squares to update.
; Parms are motion parms for squares to update.
;
;void
;nfDecompChg(unsigned short *chgs,
; unsigned short *parms,
; unsigned char *comp,
; unsigned x, unsigned y, unsigned w, unsigned h)
;
nfDecompChg PROC USES ESI EDI EBX, \
chgs:PTRWORD, \
parms:PTRWORD, \
comp:PTRBYTE, \
x:DWORD, y:DWORD, w:DWORD, h:DWORD
LOCAL tbuf: PTRBYTE
LOCAL new_row: DWORD
LOCAL DiffBufPtrs: DWORD
LOCAL pChgs: PTRBYTE
if HICOLOR
.if nf_hicolor
INVOKE nfHiColorDecompChg, chgs,parms,comp,x,y,w,h
ret
.endif
endif
LOG_LABEL "StartDecompChg"
DECOMP_CHG_BODY 0 ; Not HiColor
LOG_LABEL "EndDecompChg"
ret
nfDecompChg ENDP
endif ; PARTIAL
;----------------------------------------------------------------------
if PKDATA
.data
if (INTERP eq 1) or (INTERP eq 2) ; *** Old version for dithering ***
; luminace table for palette entries
lum_tbl DWORD 256 DUP (0)
endif
; signed 8-bit y * nf_width
nfpk_ShiftY DWORD 256 DUP (0)
; Constant tables
; 8-bit -8:7 x nf_width + -8:7
nfpk_ShiftP1 LABEL WORD
FOR y, <-8,-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7>
FOR x, <-8,-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7>
BYTE x,y
ENDM
ENDM
; 8-bit to right and below in roughly 0:14*nf_width + -14:14 (-3 cases)
; negative is
; 8-bit to left and above in roughly -14:0*nf_width + -14:14 (-3 cases)
nfpk_ShiftP2 LABEL WORD
FOR y, <0,1,2,3,4,5,6,7>
FOR x, <8,9,10,11,12,13,14>
BYTE x,y
ENDM
ENDM
FOR y, <8,9,10,11,12,13>
FOR x, <-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1>
BYTE x,y
ENDM
FOR x, <0,1,2,3,4,5,6,7,8,9,10,11,12,13,14>
BYTE x,y
ENDM
ENDM
FOR x, <-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1>
BYTE x,14
ENDM
FOR x, <0,1,2,3,4,5,6,7,8,9,10,11>
BYTE x,14
ENDM
nfpk_mov4l LABEL DWORD
; mov ax, bx,cx
MOV4L_REGS TEXTEQU <!<0c0h+3,0c0h+1!>>
%FOR m4, MOV4L_REGS
% FOR m3, MOV4L_REGS
% FOR m2, MOV4L_REGS
% FOR m1, MOV4L_REGS
BYTE m2,m1,m4,m3
ENDM
ENDM
ENDM
ENDM
nfpk_mov8 LABEL DWORD
; mov ax, bx/dx/cx/bp
MOV8_REGS TEXTEQU <!<0c0h+3,0c0h+2,0c0h+1,0c0h+5!>>
%FOR m4, MOV8_REGS
% FOR m3, MOV8_REGS
% FOR m2, MOV8_REGS
% FOR m1, MOV8_REGS
BYTE m2,m1,m4,m3
ENDM
ENDM
ENDM
ENDM
nfpk_mov4 LABEL DWORD
; mov al, bl/bh/cl/ch
MOV4_REGS0 TEXTEQU <!<0c0h+3,0c0h+7,0c0h+1,0c0h+5!>>
; mov ah, bl/bh/cl/ch
MOV4_REGS1 TEXTEQU <!<0e0h+3,0e0h+7,0e0h+1,0e0h+5!>>
%FOR m4, MOV4_REGS1
% FOR m3, MOV4_REGS0
% FOR m2, MOV4_REGS1
% FOR m1, MOV4_REGS0
BYTE m3,m4,m1,m2
ENDM
ENDM
ENDM
ENDM
.code
; nfPkConfig initializes tables used by nfPkDecomp
; which are dependent on screen size.
nfPkConfig PROC USES ESI EDI EBX
; Build ShiftY table
;
lea edi, nfpk_ShiftY
mov ebx, nf_width
mov eax, 0
mov ecx, 128
lp1: mov [edi], eax
add edi,4
add eax,ebx
dec ecx
jne lp1
mov eax, ebx
shl eax, 7
neg eax
mov ecx, 128
lp2: mov [edi], eax
add edi,4
add eax,ebx
dec ecx
jne lp2
ret
nfPkConfig ENDP
if (INTERP eq 1) or (INTERP eq 2)
; nfPkPal initializes tables used by nfPkDecomp
; which are dependent on palette.
nfPkPal PROC USES ESI EDI EBX
; Build palette luminance table
;
lea esi, pal_tbl
lea edi, lum_tbl
mov ecx, 256
lp3: xor eax, eax
xor ebx, ebx
xor edx, edx
mov al, [esi] ; r
mov bl, [esi+1] ; g
mov dl, [esi+2] ; b
add esi, 3
imul eax, 2990
imul ebx, 5866
imul edx, 1144
add eax, ebx
add eax, edx
mov [edi], eax
add edi, 4
dec ecx
jnz lp3
ret
nfPkPal ENDP
elseif INTERP eq 3
nfPkInterp1 MACRO left:REQ, right:REQ
xor eax, eax
mov al, left
mov ah, right
mov eax, [esi+eax*4]
mov edx, eax
mov dl, dh
shl edx, 8
mov dl, left
mov [edi], edx
mov ah, right
ror eax, 16
mov [edi+4], eax
ENDM
nfPkInterp2 MACRO left:REQ, right:REQ
xor eax, eax
mov al, left
mov ah, right
mov eax, [esi+eax*4]
mov edx, eax
mov dl, dh
shl edx, 8
mov dl, left
mov [edi], edx
mov [edi+ebp*1], edx
mov ah, right
ror eax, 16
mov [edi+4], eax
mov [edi+4+ebp*1], eax
ENDM
endif
ifdef SYMANTEC
EXTERN _data_bottom:PTRBYTE
endif
; Normal version
;
nfPkDecomp PROC USES ESI EDI EBX, \
ops:PTRBYTE, comp:PTRBYTE, \
x:DWORD, y:DWORD, w:DWORD, h:DWORD
LOCAL tbuf: PTRBYTE
LOCAL new_row:DWORD
LOCAL DiffBufPtrs:DWORD
LOCAL nfpk_back_right: DWORD
LOCAL wcnt:DWORD
LOG_LABEL "StartPkDecomp"
.data
nfpk_OpTbl label dword
dword offset nf0 ; Prev Same (0)
dword offset nf1 ; No change (and copied to screen) (0)
dword offset nf2 ; Near shift from older part of current buf (1)
dword offset nf3 ; Near shift from newer part of current buf (1)
dword offset nf4 ; Near shift from previous buffer (1)
dword offset nf5 ; Far shift from previous buffer (2)
dword offset nf6 ; Far shift from current buffer (2)
; [Or if COMPOPS, run of no changes (0)]
dword offset nf7 ; 8x8x1 (10 bytes) or low 4x4x1 (4 bytes)
dword offset nf8 ; 2x2 4x4x1 (16 bytes) or 2x1 4x8x1 (12 bytes) or 1x2 8x4x1 (12 bytes)
dword offset nf9 ; 8x8x2 (20 bytes) or low 4x4x2 (8 bytes) or
; low 4x8x2 (12 bytes) or low 8x4x2 (12 bytes)
dword offset nf10 ; 2x2 4x4x2 (32 bytes) or 2x1 4x8x2 (24 bytes) or 1x2 4x8x2 (24 bytes)
dword offset nf11 ; 8x8x8 (64 bytes)
dword offset nf12 ; low 4x4x8 (16 bytes)
dword offset nf13 ; 2x2 4x4x0 (ie 2x2x8) (4 bytes)
dword offset nf14 ; 8x8x0 (1 byte)
dword offset nf15 ; mix 8x8x0 (2 bytes)
.code
ifdef SYMANTEC
mov ebx, ds ; Allow DS to access code
mov ecx, 0
mov ax, 3505h
int 21h
endif
NF_DECOMP_INIT 0
mov eax, nf_back_right
sub eax, SWIDTH
mov nfpk_back_right, eax
mov esi, comp
mov edi, tbuf
nf_StartRow:
mov eax, w
shr eax, 1
mov wcnt,eax
ALIGN 4
nf_NextPair:
dec wcnt
js nf_NextRow
mov ebx, ops
mov al, [ebx]
inc ebx
mov ops, ebx
xor ebx, ebx
mov bl, al
shr bl, 4
and eax, 0Fh
push offset nf_NextPair
push nfpk_OpTbl[ebx*4]
jmp nfpk_OpTbl[eax*4]
nf_NextRow:
add edi, new_row
dec h
jnz nf_StartRow
LOG_LABEL "EndPkDecomp"
ifdef SYMANTEC
mov ebx, ds ; Disable DS from accessing code
mov ecx, offset DGROUP:_data_bottom[-1]
mov ax, 3505h
int 21h
endif
ret
;----------------------------------------
ALIGN 4
if INTERP eq 0
nf0: ; No change from previous buffer
mov eax, DiffBufPtrs
jmp nf_shift
elseif INTERP eq 3
nf0: ; Interpolated (1 byte)
push ebp
mov ebp, nf_width
sub edi, ebp ; Get four corner colors
mov bl, [edi-1] ; into bl,bh,cl,ch
mov bh, [edi+7]
mov cl, [edi+ebp*8-1]
mov ch, [esi]
inc esi
add edi, ebp
push esi
mov esi, blend_tbl
nfPkInterp1 bl,bh
add edi, ebp
push ebx
push ecx
xor eax, eax
mov al, bl
mov ah, cl
mov edx, [esi+eax*4]
mov al, bh
mov ah, ch
mov ecx, [esi+eax*4]
mov ebx, edx
nfPkInterp2 bh,ch
lea edi, [edi+ebp*2]
ror ebx, 16
ror ecx, 16
nfPkInterp2 bl,cl
lea edi, [edi+ebp*2]
nfPkInterp2 bh,ch
lea edi, [edi+ebp*2]
pop ecx
pop ebx
nfPkInterp1 cl,ch
pop esi
pop ebp
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
elseif INTERP eq 2
nf0: ; Interpolated (1 byte)
mov edx, nf_width
sub edi, edx ; Get four corner colors
sub edi, edx ;xxx
mov bl, [edi-1] ; into bl,bh,cl,ch
mov bh, [edi+7]
mov cl, [edi+edx*8-1]
mov ch, [esi]
inc esi
add edi, edx ;xxx
add edi, edx
; Get four luminances into eax, ebx, ebp, ecx
; Use edx for temp, esi for closest luminance, edi for closest pair
push ebx
push ecx
push esi
push edi
push ebp
xor edx, edx
mov dl, bl
mov eax, lum_tbl[edx*4]
mov dl, bh
mov ebx, lum_tbl[edx*4]
mov dl, cl
mov ebp, lum_tbl[edx*4]
mov dl, ch
mov ecx, lum_tbl[edx*4]
mov edx, eax
sub edx, ebx
jns nf0a
neg edx
nf0a: mov esi, edx
mov edi, 0 ; Vert
mov edx, eax
sub edx, ebp
jns nf0b
neg edx
nf0b: cmp edx, esi
ja nf0c
mov esi, edx
mov edi, 1 ; Horiz
nf0c: mov edx, eax
sub edx, ecx
jns nf0d
neg edx
nf0d: cmp edx, esi
ja nf0e
mov esi, edx
mov edi, 2 ; \ Diag
nf0e: mov edx, ebx
sub edx, ebp
jns nf0f
neg edx
nf0f: cmp edx, esi
ja nf0g
mov esi, edx ; / RDiag
mov edi, 3
nf0g:
mov edx, ebx
sub edx, ecx
jns nf0h
neg edx
nf0h: cmp edx, esi
ja nf0i
mov esi, edx
mov edi, 1 ; Horiz
nf0i: mov edx, ebp
sub edx, ecx
jns nf0j
neg edx
nf0j: cmp edx, esi
ja nf0k
mov edi, 0
nf0k: mov eax, edi
pop ebp
pop edi
pop esi
pop ecx
pop ebx
mov edx, nf_width
cmp eax, 2
jae nfdiag
or eax, eax
jz nf0_v
jmp nf0_h
nfdiag: jz nf0_d
jmp nf0_r
if 1 ; Newer versions of Vertical and Horizontal blend that use 0%,25%,50%,75%,100% instead of just 0%,50%,100%
; Vertical blend
; 0 1
; 01010101 1
; 00121013 2
; 02010311 3
; 20203131 4
; 02021313 5
; 23202331 6
; 20332123 7
;2 22233233 8
;
nf0_v:
; 3412 (low to high)
;------
mov al, bl ; 0101 (1)
mov ah, bh
shl eax, 16
mov al, bl
mov ah, bh
mov [edi], eax
mov [edi+4], eax ; 0101
add edi, edx
mov al, bh ; 0012 (2)
mov ah, cl
shl eax, 8
mov al, bl
mov ah, bl
mov [edi], eax
mov al, bh ; 1013
mov ah, ch
shl eax, 16
mov al, bh
mov ah, bl
mov [edi+4], eax
add edi, edx
mov al, bl ; 0201 (3)
mov ah, bh
shl eax, 16
mov al, bl
mov ah, cl
mov [edi], eax
mov al, bh ; 0311
mov ah, bh
shl eax, 16
mov al, bl
mov ah, ch
mov [edi+4], eax
add edi, edx
mov al, cl ; 2020 (4), 0202 (5)
mov ah, bl
shl eax, 16
mov al, cl
mov ah, bl
mov [edi], eax
ror eax, 8
mov [edi+edx], eax
mov al, ch ; 3131, 1313
mov ah, bh
shl eax, 16
mov al, ch
mov ah, bh
mov [edi+4], eax
ror eax, 8
mov [edi+edx+4], eax
lea edi, [edi+edx*2]
mov al, cl ; 2320 (6)
mov ah, bl
shl eax, 16
mov al, cl
mov ah, ch
mov [edi], eax
mov al, ch ; 2331
mov ah, bh
shl eax, 16
mov al, cl
mov ah, ch
mov [edi+4], eax
add edi, edx
rol eax, 8 ; 2033 (7)
mov al, cl
mov ah, bl
mov [edi], eax
mov al, cl ; 2123
mov ah, ch
shl eax, 16
mov al, cl
mov ah, bh
mov [edi+4], eax
add edi, edx
mov ah, cl ; 2223 (8)
mov [edi], eax
mov al, ch ; 3233
mov ah, ch
shl eax, 16
mov al, ch
mov ah, cl
mov [edi+4], eax
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
; Horizontal blend
; 0 1
; 00010111 1
; 20101301 2
; 02010131 3
; 21201033 4
; 02032113 5
; 20323321 6
; 02232313 7
;2 23223233 8
nf0_h:
; 3412 (low to high)
;------
mov al, bl ; 0001 (1)
mov ah, bh
shl eax, 16
mov al, bl
mov ah, bl
mov [edi], eax
mov al, bh ; 0111
mov ah, bh
rol eax, 16
mov [edi+4], eax
add edi, edx
ror eax, 8 ; 2010 (2)
mov al, cl
mov ah, bl
mov [edi], eax
rol eax, 8
mov al, bh ; 1301
mov ah, ch
mov [edi+4], eax
add edi, edx
mov al, bl ; 0201 (3)
mov ah, cl
mov [edi], eax
mov al, ch ; 0131
mov ah, bh
rol eax, 16
mov [edi+4], eax
add edi, edx
mov al, cl ; 2120 (4)
mov ah, bl
shl eax, 16
mov al, cl
mov ah, bh
mov [edi], eax
mov al, ch ; 1033
mov ah, ch
shl eax, 16
mov al, bh
mov ah, bl
mov [edi+4], eax
add edi, edx
rol eax, 8 ; 0203 (5)
mov al, bl
mov ah, cl
mov [edi], eax
mov al, bh ; 2113
mov ah, ch
shl eax, 16
mov al, cl
mov ah, bh
mov [edi+4], eax
add edi, edx
ror eax, 8 ; 2032 (6)
mov al, cl
mov ah, bl
mov [edi], eax
mov al, bh ; 3321
mov ah, ch
ror eax, 8
mov [edi+4], eax
add edi, edx
mov al, cl ; 0223 (7)
mov ah, ch
shl eax, 16
mov al, bl
mov ah, cl
mov [edi], eax
mov al, bh ; 2313
mov ah, ch
rol eax, 16
mov [edi+4], eax
add edi, edx
shl eax, 16 ; 2322 (8)
mov al, cl
mov ah, ch
mov [edi], eax
mov al, ch ; 3233
mov ah, ch
shl eax, 16
mov al, ch
mov ah, cl
mov [edi+4], eax
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
else
; Vertical blend
;0 1
; 00101011 1
; 00010111 2
; 20203131 3
; 02021313 4
; 20203131 5
; 02021313 6
; 22323233 7
;2 22232333 8
;
nf0_v:
push ebp
; 3412 (low to high)
;------
mov al, bh ; 0010 (1)
mov ah, bl
shl eax, 16
mov al, bl
mov ah, bl
mov [edi], eax
mov al, bh ; 1011
mov ah, bh
rol eax, 16
mov [edi+4], eax
add edi, edx
rol eax, 8 ; 0001 (2)
mov al, bl
mov ah, bl
mov [edi], eax
mov al, bh ; 0111
mov ah, bh
rol eax, 16
mov [edi+4], eax
add edi, edx
mov al, cl ; 2020 (3+5)
mov ah, bl
shl eax, 16
mov al, cl
mov ah, bl
mov ebp, eax
mov [edi], eax
mov [edi+edx*2], eax
mov al, ch ; 3131
mov ah, bh
shl eax, 16
mov al, ch
mov ah, bh
mov [edi+4], eax
mov [edi+edx*2+4], eax
add edi, edx
rol ebp, 8 ; 0202 (4+6)
mov [edi], ebp
mov [edi+edx*2], ebp
rol eax, 8 ; 1313
mov [edi+4], eax
mov [edi+edx*2+4], eax
add edi, edx
lea edi, [edi+edx*2]
mov al, ch ; 2232 (7)
mov ah, cl
shl eax, 16
mov al, cl
mov ah, cl
mov [edi], eax
mov al, ch ; 3233
mov ah, ch
rol eax, 16
mov [edi+4], eax
add edi, edx
mov al, cl ; 2223 (8)
mov ah, ch
shl eax, 16
mov al, cl
mov ah, cl
mov [edi], eax
mov al, ch ; 2333
mov ah, ch
rol eax, 16
mov [edi+4], eax
pop ebp
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
; Horizontal blend
;0 1
; 00101011 1
; 00010111 2
; 20101031 3
; 02010113 4
; 20323231 5
; 02232313 6
; 22323233 7
;2 22232333 8
;
nf0_h:
; 3412 (low to high)
;------
mov al, bh ; 0010 (1)
mov ah, bl
shl eax, 16
mov al, bl
mov ah, bl
mov [edi], eax
mov al, bh ; 1011
mov ah, bh
rol eax, 16
mov [edi+4], eax
add edi, edx
rol eax, 8 ; 0001 (2)
mov al, bl
mov ah, bl
mov [edi], eax
mov al, bh ; 0111
mov ah, bh
rol eax, 16
mov [edi+4], eax
add edi, edx
ror eax, 8 ; 2010 (3)
mov al, cl
mov ah, bl
mov [edi], eax
mov al, ch ; 1031
mov ah, bh
rol eax, 16
mov [edi+4], eax
add edi, edx
mov al, bl ; 0201 (4)
mov ah, bh
rol eax, 16
mov al, bl
mov ah, cl
mov [edi], eax
mov al, bh ; 0113
mov ah, ch
rol eax, 16
mov [edi+4], eax
add edi, edx
mov al, ch ; 2032 (5)
mov ah, cl
shl eax, 16
mov al, cl
mov ah, bl
mov [edi], eax
mov al, ch ; 3231
mov ah, bh
rol eax, 16
mov [edi+4], eax
add edi, edx
rol eax, 8 ; 0223 (6)
mov al, bl
mov ah, cl
mov [edi], eax
mov al, bh ; 2313
mov ah, ch
rol eax, 16
mov [edi+4], eax
add edi, edx
mov al, ch ; 2232 (7)
mov ah, cl
shl eax, 16
mov al, cl
mov ah, cl
mov [edi], eax
mov al, ch ; 3233
mov ah, ch
rol eax, 16
mov [edi+4], eax
add edi, edx
mov al, cl ; 2223 (8)
mov ah, ch
shl eax, 16
mov al, cl
mov ah, cl
mov [edi], eax
mov al, ch ; 2333
mov ah, ch
rol eax, 16
mov [edi+4], eax
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
endif
; \ Diagonal blend
;0 1
; 00010101 1
; 00001313 2
; 20303101 3
; 02030313 4
; 23203031 5
; 02020333 6
; 23232333 7
;2 22023233 8
;
nf0_d:
; 3412 (low to high)
;------
mov al, bl ; 0001 (1)
mov ah, bh
shl eax, 16
mov al, bl
mov ah, bl
mov [edi], eax
mov ah, bh ; 0101
mov [edi+4], eax
add edi, edx
mov ah, bl ; 0000 (2)
rol eax, 16
mov ah, bl
mov [edi], eax
mov al, bh ; 1313
mov ah, ch
shl eax, 16
mov al, bh
mov ah, ch
mov [edi+4], eax
add edi, edx
mov al, ch ; 2030 (3)
mov ah, bl
shl eax, 16
mov al, cl
mov ah, bl
mov [edi], eax
mov al, bl ; 3101
mov ah, bh
shl eax, 16
mov al, ch
mov ah, bh
mov [edi+4], eax
add edi, edx
mov al, bl ; 0203 (4)
mov ah, ch
shl eax, 16
mov al, bl
mov ah, cl
mov [edi], eax
mov al, bh ; 0313
mov ah, ch
rol eax, 16
mov [edi+4], eax
add edi, edx
mov al, cl ; 2320 (5)
mov ah, bl
shl eax, 16
mov al, cl
mov ah, ch
mov [edi], eax
mov al, ch ; 3031
mov ah, bh
shl eax, 16
mov al, ch
mov ah, bl
mov [edi+4], eax
add edi, edx
mov al, bl ; 0202 (6)
mov ah, cl
shl eax, 16
mov al, bl
mov ah, cl
mov [edi], eax
mov ah, ch ; 0333
shl eax, 16
mov al, ch
mov ah, ch
mov [edi+4], eax
add edi, edx
mov al, cl ; 2323 (7)
rol eax, 16
mov al, cl
mov [edi], eax
mov al, ch ; 2333
rol eax, 16
mov [edi+4], eax
add edi, edx
mov al, bl ; 2202 (8)
mov ah, cl
shl eax, 16
mov al, cl
mov ah, cl
mov [edi], eax
mov al, ch ; 3233
mov ah, ch
shl eax, 16
mov al, ch
mov ah, cl
mov [edi+4], eax
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
; / RDiagonal blend
;0 1
; 01010111 1
; 20201111 2
; 01021313 3
; 20212131 4
; 02121323 5
; 22213131 6
; 22232323 7
;2 22323133 8
;
nf0_r:
; 3412 (low to high)
;------
mov al, bl ; 0101 (1)
mov ah, bh
shl eax, 16
mov al, bl
mov ah, bh
mov [edi], eax
mov al, bh ; 0111
rol eax, 16
mov [edi+4], eax
add edi, edx
mov al, cl ; 2020 (2)
mov ah, bl
shl eax, 16
mov al, cl
mov ah, bl
mov [edi], eax
mov al, bh ; 1111
mov ah, bh
shl eax, 16
mov al, bh
mov ah, bh
mov [edi+4], eax
add edi, edx
mov al, bl ; 0102 (3)
mov ah, cl
rol eax, 16
mov al, bl
mov [edi], eax
mov al, bh ; 1313
mov ah, ch
shl eax, 16
mov al, bh
mov ah, ch
mov [edi+4], eax
add edi, edx
mov al, cl ; 2021 (4)
mov ah, bh
shl eax, 16
mov al, cl
mov ah, bl
mov [edi], eax
mov al, ch ; 2131
mov ah, bh
rol eax, 16
mov [edi+4], eax
add edi, edx
ror eax, 8 ; 0212 (5)
mov al, bl
mov ah, cl
mov [edi], eax
mov al, cl ; 1323
mov ah, ch
shl eax, 16
mov al, bh
mov ah, ch
mov [edi+4], eax
add edi, edx
mov al, cl ; 2221 (6)
mov ah, bh
shl eax, 16
mov al, cl
mov ah, cl
mov [edi], eax
mov al, ch ; 3131
mov ah, bh
shl eax, 16
mov al, ch
mov ah, bh
mov [edi+4], eax
add edi, edx
mov al, cl ; 2223 (7)
mov ah, ch
shl eax, 16
mov al, cl
mov ah, cl
mov [edi], eax
mov ah, ch ; 2323
rol eax, 16
mov [edi+4], eax
add edi, edx
rol eax, 8 ; 2232 (8)
mov al, cl
mov [edi], eax
mov al, ch ; 3133
mov ah, ch
shl eax, 16
mov al, ch
mov ah, bh
mov [edi+4], eax
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
elseif INTERP eq 1
nf0: ; Interpolated (1 byte)
mov edx, nf_width
sub edi, edx ; Get four corner colors
sub edi, edx ;xxx
mov bl, [edi-1] ; into bl,bh,cl,ch
mov bh, [edi+7]
mov cl, [edi+edx*8-1]
mov ch, [esi]
inc esi
add edi, edx ;xxx
add edi, edx
; Pattern for interpolating four corners:
;0 1
; 00101011 1
; 00010111 2
; 20023113 3
; 02101031 4
; 20323213 5
; 02201331 6
; 22232333 7
;2 22323233 8
; 3412 (low to high)
;------
nf0_1:
mov al, bh ; 0010
mov ah, bl
shl eax, 16
mov al, bl
mov ah, bl
mov [edi], eax
mov al, bh ; 1011
mov ah, bh
rol eax, 16
mov [edi+4], eax
add edi, edx
nf0_2: rol eax, 8 ; 0001
mov al, bl
mov ah, bl
mov [edi], eax
mov al, bh ; 0111
mov ah, bh
rol eax, 16
mov [edi+4], eax
add edi, edx
nf0_3: mov al, bl ; 2002
mov ah, cl
shl eax, 16
mov al, cl
mov ah, bl
mov [edi], eax
mov al, bh ; 3113
mov ah, ch
shl eax, 16
mov al, ch
mov ah, bh
mov [edi+4], eax
add edi, edx
nf0_4: mov al, bh ; 0210
mov ah, bl
shl eax, 16
mov al, bl
mov ah, cl
mov [edi], eax
mov al, ch ; 1031
mov ah, bh
rol eax, 16
mov [edi+4], eax
add edi, edx
nf0_5: mov al, cl ; 2032
mov ah, ch
shl eax, 16
mov al, cl
mov ah, bl
mov [edi], eax
mov al, bh ; 3213
mov ah, ch
rol eax, 16
mov [edi+4], eax
add edi, edx
nf0_6: mov al, cl ; 0220
mov ah, bl
shl eax, 16
mov al, bl
mov ah, cl
mov [edi], eax
mov al, ch ; 1331
mov ah, bh
shl eax, 16
mov al, bh
mov ah, ch
mov [edi+4], eax
add edi, edx
nf0_7: mov al, cl ; 2223
mov ah, ch
shl eax, 16
mov al, cl
mov ah, cl
mov [edi], eax
mov al, ch ; 2333
mov ah, ch
rol eax, 16
mov [edi+4], eax
add edi, edx
nf0_8: ror eax, 8 ; 2232
mov al, cl
mov ah, cl
mov [edi], eax
mov al, ch ; 3233
mov ah, ch
rol eax, 16
mov [edi+4], eax
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
endif
;----------------------------------------
ALIGN 4
nf1: ; No change (and copied to screen)
add edi, SWIDTH
retn
;----------------------------------------
ALIGN 4
nf2: ; Near shift from older part of current buffer
xor eax, eax
mov al, [esi]
inc esi
mov ax, nfpk_ShiftP2[eax*2]
nf_xyc_shift:
xor ebx, ebx
mov bl, ah
shl eax, 24
sar eax, 24
add eax, nfpk_ShiftY[ebx*4]
jmp nf_shift
;----------------------------------------
ALIGN 4
nf3: ; Near shift from newer part of current buffer
xor eax, eax
mov al, [esi]
inc esi
mov ax, nfpk_ShiftP2[eax*2]
neg al
neg ah
jmp nf_xyc_shift
;----------------------------------------
ALIGN 4
nf4: ; Near shift from previous buffer
xor eax, eax
mov al, [esi]
inc esi
mov ax, nfpk_ShiftP1[eax*2]
jmp nf_xyp_shift
;----------------------------------------
ALIGN 4
nf5: ; Far shift from previous buffer
mov ax, [esi]
add esi, 2
nf_xyp_shift:
xor ebx, ebx
mov bl, ah
shl eax, 24
sar eax, 24
add eax, nfpk_ShiftY[ebx*4]
add eax, DiffBufPtrs
jmp nf_shift
;----------------------------------------
ALIGN 4
if COMPOPS
nf6: ; Run of no changes (must only appear in first nibble opcodes)
; Next nibble k specifies 2k+4 squares with no changes
add esp, 4 ; Next nibble is not an opcode
add ebx, 2 ; (minimum of 4 squares)
ALIGN 4
nf6a: add edi, SWIDTH*2 ; Advance over two squares
dec ebx
jz nf6z ; Last pair of squares
dec wcnt ; Same row?
jns nf6a ; Yes
add edi, new_row ; Advance to next row
dec h ; Decrement row count (should never become zero here)
mov eax, w ; Reset wcnt
shr eax ,1
dec eax
mov wcnt, eax
jmp nf6a
nf6z: retn
else
nf6: ; Far shift from current buffer
mov ax, [esi]
add esi, 2
jmp nf_xyc_shift
endif
;----------------------------------------
ALIGN 4
nf_shift:
if 0 ;debug
mov eax, 0
mov ebx, eax
jmp nf_solid
endif
mov ebx, esi ; save esi
lea esi, [edi+eax]
mov edx, nf_width
REPEAT 7
mov eax, [esi]
mov [edi], eax
mov eax, [esi+4]
mov [edi+4], eax
add esi, edx
add edi, edx
ENDM
mov eax, [esi]
mov [edi], eax
mov eax, [esi+4]
mov [edi+4], eax
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
mov esi, ebx ; restore esi
retn
;----------------------------------------
ALIGN 4
nf7: ; 8x8x1 (10 bytes)
mov ax, [esi]
cmp al, ah
ja nf23
if 0 ;debug
add esi, 10
mov eax, 0fefefefeH
mov ebx, eax
jmp nf_solid
endif
xor eax, eax
lea ecx, nfpk_mov8
lea edx, byte ptr ds:nf7_11+2
mov al, [esi+2]
mov ebx, [ecx+eax*4]
mov [edx+(nf7_11-nf7_11)], bl
mov [edx+(nf7_12-nf7_11)], bh
shr ebx, 16
mov [edx+(nf7_13-nf7_11)], bl
mov [edx+(nf7_14-nf7_11)], bh
mov al, [esi+3]
mov ebx, [ecx+eax*4]
mov [edx+(nf7_21-nf7_11)], bl
mov [edx+(nf7_22-nf7_11)], bh
shr ebx, 16
mov [edx+(nf7_23-nf7_11)], bl
mov [edx+(nf7_24-nf7_11)], bh
mov al, [esi+4]
mov ebx, [ecx+eax*4]
mov [edx+(nf7_31-nf7_11)], bl
mov [edx+(nf7_32-nf7_11)], bh
shr ebx, 16
mov [edx+(nf7_33-nf7_11)], bl
mov [edx+(nf7_34-nf7_11)], bh
mov al, [esi+5]
mov ebx, [ecx+eax*4]
mov [edx+(nf7_41-nf7_11)], bl
mov [edx+(nf7_42-nf7_11)], bh
shr ebx, 16
mov [edx+(nf7_43-nf7_11)], bl
mov [edx+(nf7_44-nf7_11)], bh
lea edx, [edx+(nf7_51-nf7_11)]
mov al, [esi+6]
mov ebx, [ecx+eax*4]
mov [edx+(nf7_51-nf7_51)], bl
mov [edx+(nf7_52-nf7_51)], bh
shr ebx, 16
mov [edx+(nf7_53-nf7_51)], bl
mov [edx+(nf7_54-nf7_51)], bh
mov al, [esi+7]
mov ebx, [ecx+eax*4]
mov [edx+(nf7_61-nf7_51)], bl
mov [edx+(nf7_62-nf7_51)], bh
shr ebx, 16
mov [edx+(nf7_63-nf7_51)], bl
mov [edx+(nf7_64-nf7_51)], bh
mov al, [esi+8]
mov ebx, [ecx+eax*4]
mov [edx+(nf7_71-nf7_51)], bl
mov [edx+(nf7_72-nf7_51)], bh
shr ebx, 16
mov [edx+(nf7_73-nf7_51)], bl
mov [edx+(nf7_74-nf7_51)], bh
mov al, [esi+9]
mov ebx, [ecx+eax*4]
mov [edx+(nf7_81-nf7_51)], bl
mov [edx+(nf7_82-nf7_51)], bh
shr ebx, 16
mov [edx+(nf7_83-nf7_51)], bl
mov [edx+(nf7_84-nf7_51)], bh
push ebp
push esi
; load bx,dx,cx,bp with 00,01,10,11 color combinations
; (note that bits are read least significant first).
mov cx, [esi]
mov esi,nf_width
mov bl,cl
mov bh,cl
mov dl,ch
mov dh,cl
mov al,ch
mov ah,ch
mov ebp,eax
jmp nf7_0 ; flush prefetch
ALIGN 4
nf7_0:
nf7_11: mov ax, bx
shl eax, 16
nf7_12: mov ax, bx
mov [edi], eax
nf7_13: mov ax, bx
shl eax, 16
nf7_14: mov ax, bx
mov [edi+4], eax
add edi, esi
nf7_21: mov ax, bx
shl eax, 16
nf7_22: mov ax, bx
mov [edi], eax
nf7_23: mov ax, bx
shl eax, 16
nf7_24: mov ax, bx
mov [edi+4], eax
add edi, esi
nf7_31: mov ax, bx
shl eax, 16
nf7_32: mov ax, bx
mov [edi], eax
nf7_33: mov ax, bx
shl eax, 16
nf7_34: mov ax, bx
mov [edi+4], eax
add edi, esi
nf7_41: mov ax, bx
shl eax, 16
nf7_42: mov ax, bx
mov [edi], eax
nf7_43: mov ax, bx
shl eax, 16
nf7_44: mov ax, bx
mov [edi+4], eax
add edi, esi
nf7_51: mov ax, bx
shl eax, 16
nf7_52: mov ax, bx
mov [edi], eax
nf7_53: mov ax, bx
shl eax, 16
nf7_54: mov ax, bx
mov [edi+4], eax
add edi, esi
nf7_61: mov ax, bx
shl eax, 16
nf7_62: mov ax, bx
mov [edi], eax
nf7_63: mov ax, bx
shl eax, 16
nf7_64: mov ax, bx
mov [edi+4], eax
add edi, esi
nf7_71: mov ax, bx
shl eax, 16
nf7_72: mov ax, bx
mov [edi], eax
nf7_73: mov ax, bx
shl eax, 16
nf7_74: mov ax, bx
mov [edi+4], eax
add edi, esi
nf7_81: mov ax, bx
shl eax, 16
nf7_82: mov ax, bx
mov [edi], eax
nf7_83: mov ax, bx
shl eax, 16
nf7_84: mov ax, bx
mov [edi+4], eax
pop esi
pop ebp
add esi, 10
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
;----------------------------------------
ALIGN 4
;nf7+16
nf23: ; low 4x4x1 (4 bytes)
xor eax, eax
lea ecx, nfpk_mov4l
lea edx, byte ptr ds:nf23_11+2
mov al, [esi+2]
and al, 0fH
mov ebx, [ecx+eax*4]
mov [edx+(nf23_11-nf23_11)], bl
mov [edx+(nf23_12-nf23_11)], bh
shr ebx, 16
mov [edx+(nf23_13-nf23_11)], bl
mov [edx+(nf23_14-nf23_11)], bh
mov al, [esi+2]
shr al, 4
mov ebx, [ecx+eax*4]
mov [edx+(nf23_31-nf23_11)], bl
mov [edx+(nf23_32-nf23_11)], bh
shr ebx, 16
mov [edx+(nf23_33-nf23_11)], bl
mov [edx+(nf23_34-nf23_11)], bh
mov al, [esi+3]
and al, 0fH
mov ebx, [ecx+eax*4]
mov [edx+(nf23_51-nf23_11)], bl
mov [edx+(nf23_52-nf23_11)], bh
shr ebx, 16
mov [edx+(nf23_53-nf23_11)], bl
mov [edx+(nf23_54-nf23_11)], bh
mov al, [esi+3]
shr al, 4
mov ebx, [ecx+eax*4]
mov [edx+(nf23_71-nf23_11)], bl
mov [edx+(nf23_72-nf23_11)], bh
shr ebx, 16
mov [edx+(nf23_73-nf23_11)], bl
mov [edx+(nf23_74-nf23_11)], bh
mov edx, nf_width
; load bx,cx with 00,11 color combinations
mov bx, [esi]
mov cl, bh
mov bh, bl
mov ch, cl
jmp nf23_0 ; flush prefetch
ALIGN 4
nf23_0:
nf23_11:mov ax, bx
shl eax, 16
nf23_12:mov ax, bx
mov [edi], eax
mov [edi+edx], eax
nf23_13:mov ax, bx
shl eax, 16
nf23_14:mov ax, bx
mov [edi+4], eax
mov [edi+edx+4], eax
lea edi, [edi+edx*2]
nf23_31:mov ax, bx
shl eax, 16
nf23_32:mov ax, bx
mov [edi], eax
mov [edi+edx], eax
nf23_33:mov ax, bx
shl eax, 16
nf23_34:mov ax, bx
mov [edi+4], eax
mov [edi+edx+4], eax
lea edi, [edi+edx*2]
nf23_51:mov ax, bx
shl eax, 16
nf23_52:mov ax, bx
mov [edi], eax
mov [edi+edx], eax
nf23_53:mov ax, bx
shl eax, 16
nf23_54:mov ax, bx
mov [edi+4], eax
mov [edi+edx+4], eax
lea edi, [edi+edx*2]
nf23_71:mov ax, bx
shl eax, 16
nf23_72:mov ax, bx
mov [edi], eax
mov [edi+edx], eax
nf23_73:mov ax, bx
shl eax, 16
nf23_74:mov ax, bx
mov [edi+4], eax
add edi, edx
mov [edi+4], eax
sub edi, nfpk_back_right
add esi, 4
retn
;----------------------------------------
ALIGN 4
nf8: ; 2x2 4x4x1 (16 bytes)
mov ax, [esi]
cmp al, ah
ja nf24
xor eax, eax
lea ecx, nfpk_mov8
lea edx, byte ptr ds:nf8_11+2
mov al, [esi+2]
mov ebx, [ecx+eax*4]
mov [edx+(nf8_11-nf8_11)], bl
mov [edx+(nf8_12-nf8_11)], bh
shr ebx, 16
mov [edx+(nf8_13-nf8_11)], bl
mov [edx+(nf8_14-nf8_11)], bh
mov al, [esi+3]
mov ebx, [ecx+eax*4]
mov [edx+(nf8_21-nf8_11)], bl
mov [edx+(nf8_22-nf8_11)], bh
shr ebx, 16
mov [edx+(nf8_23-nf8_11)], bl
mov [edx+(nf8_24-nf8_11)], bh
mov al, [esi+6]
mov ebx, [ecx+eax*4]
mov [edx+(nf8_31-nf8_11)], bl
mov [edx+(nf8_32-nf8_11)], bh
shr ebx, 16
mov [edx+(nf8_33-nf8_11)], bl
mov [edx+(nf8_34-nf8_11)], bh
mov al, [esi+7]
mov ebx, [ecx+eax*4]
mov [edx+(nf8_41-nf8_11)], bl
mov [edx+(nf8_42-nf8_11)], bh
shr ebx, 16
mov [edx+(nf8_43-nf8_11)], bl
mov [edx+(nf8_44-nf8_11)], bh
add edx, nf8_51-nf8_11
mov al, [esi+10]
mov ebx, [ecx+eax*4]
mov [edx+(nf8_51-nf8_51)], bl
mov [edx+(nf8_52-nf8_51)], bh
shr ebx, 16
mov [edx+(nf8_53-nf8_51)], bl
mov [edx+(nf8_54-nf8_51)], bh
mov al, [esi+11]
mov ebx, [ecx+eax*4]
mov [edx+(nf8_61-nf8_51)], bl
mov [edx+(nf8_62-nf8_51)], bh
shr ebx, 16
mov [edx+(nf8_63-nf8_51)], bl
mov [edx+(nf8_64-nf8_51)], bh
mov al, [esi+14]
mov ebx, [ecx+eax*4]
mov [edx+(nf8_71-nf8_51)], bl
mov [edx+(nf8_72-nf8_51)], bh
shr ebx, 16
mov [edx+(nf8_73-nf8_51)], bl
mov [edx+(nf8_74-nf8_51)], bh
mov al, [esi+15]
mov ebx, [ecx+eax*4]
mov [edx+(nf8_81-nf8_51)], bl
mov [edx+(nf8_82-nf8_51)], bh
shr ebx, 16
mov [edx+(nf8_83-nf8_51)], bl
mov [edx+(nf8_84-nf8_51)], bh
push ebp
push esi
; load bx,dx,cx,bp with 00,01,10,11 color combinations
; (note that bits are read least significant first).
mov cx, [esi]
mov esi, nf_width
mov bl,cl
mov bh,cl
mov dl,ch
mov dh,cl
mov al,ch
mov ah,ch
mov ebp,eax
jmp nf8_0 ; flush prefetch
ALIGN 4
nf8_0:
nf8_11: mov ax, bx
shl eax, 16
nf8_12: mov ax, bx
mov [edi], eax
add edi, esi
nf8_13: mov ax, bx
shl eax, 16
nf8_14: mov ax, bx
mov [edi], eax
add edi, esi
nf8_21: mov ax, bx
shl eax, 16
nf8_22: mov ax, bx
mov [edi], eax
add edi, esi
nf8_23: mov ax, bx
shl eax, 16
nf8_24: mov ax, bx
mov [edi], eax
add edi, esi
mov eax, [esp]
mov cx, [eax+4]
mov bl,cl
mov bh,cl
mov dl,ch
mov dh,cl
mov al,ch
mov ah,ch
mov ebp,eax
nf8_31: mov ax, bx
shl eax, 16
nf8_32: mov ax, bx
mov [edi], eax
add edi, esi
nf8_33: mov ax, bx
shl eax, 16
nf8_34: mov ax, bx
mov [edi], eax
add edi, esi
nf8_41: mov ax, bx
shl eax, 16
nf8_42: mov ax, bx
mov [edi], eax
add edi, esi
nf8_43: mov ax, bx
shl eax, 16
nf8_44: mov ax, bx
mov [edi], eax
add edi, esi
lea eax, [esi*8-4]
sub edi, eax
mov eax, [esp]
mov cx, [eax+8]
mov bl,cl
mov bh,cl
mov dl,ch
mov dh,cl
mov al,ch
mov ah,ch
mov ebp,eax
nf8_51: mov ax, bx
shl eax, 16
nf8_52: mov ax, bx
mov [edi], eax
add edi, esi
nf8_53: mov ax, bx
shl eax, 16
nf8_54: mov ax, bx
mov [edi], eax
add edi, esi
nf8_61: mov ax, bx
shl eax, 16
nf8_62: mov ax, bx
mov [edi], eax
add edi, esi
nf8_63: mov ax, bx
shl eax, 16
nf8_64: mov ax, bx
mov [edi], eax
add edi, esi
mov eax, [esp]
mov cx, [eax+12]
mov bl,cl
mov bh,cl
mov dl,ch
mov dh,cl
mov al,ch
mov ah,ch
mov ebp,eax
nf8_71: mov ax, bx
shl eax, 16
nf8_72: mov ax, bx
mov [edi], eax
add edi, esi
nf8_73: mov ax, bx
shl eax, 16
nf8_74: mov ax, bx
mov [edi], eax
add edi, esi
nf8_81: mov ax, bx
shl eax, 16
nf8_82: mov ax, bx
mov [edi], eax
add edi, esi
nf8_83: mov ax, bx
shl eax, 16
nf8_84: mov ax, bx
mov [edi], eax
pop esi
pop ebp
add esi, 16
sub edi, 4
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
;----------------------------------------
ALIGN 4
;nf8+16
nf24: ; 2x1 4x8x1 (12 bytes)
mov ax, [esi+6]
cmp al, ah
ja nf40
xor eax, eax
lea ecx, nfpk_mov8
lea edx, byte ptr ds:nf24_11+2
mov al, [esi+2]
mov ebx, [ecx+eax*4]
mov [edx+(nf24_11-nf24_11)], bl
mov [edx+(nf24_12-nf24_11)], bh
shr ebx, 16
mov [edx+(nf24_13-nf24_11)], bl
mov [edx+(nf24_14-nf24_11)], bh
mov al, [esi+3]
mov ebx, [ecx+eax*4]
mov [edx+(nf24_21-nf24_11)], bl
mov [edx+(nf24_22-nf24_11)], bh
shr ebx, 16
mov [edx+(nf24_23-nf24_11)], bl
mov [edx+(nf24_24-nf24_11)], bh
mov al, [esi+4]
mov ebx, [ecx+eax*4]
mov [edx+(nf24_31-nf24_11)], bl
mov [edx+(nf24_32-nf24_11)], bh
shr ebx, 16
mov [edx+(nf24_33-nf24_11)], bl
mov [edx+(nf24_34-nf24_11)], bh
mov al, [esi+5]
mov ebx, [ecx+eax*4]
mov [edx+(nf24_41-nf24_11)], bl
mov [edx+(nf24_42-nf24_11)], bh
shr ebx, 16
mov [edx+(nf24_43-nf24_11)], bl
mov [edx+(nf24_44-nf24_11)], bh
add edx, nf24_51-nf24_11
mov al, [esi+8]
mov ebx, [ecx+eax*4]
mov [edx+(nf24_51-nf24_51)], bl
mov [edx+(nf24_52-nf24_51)], bh
shr ebx, 16
mov [edx+(nf24_53-nf24_51)], bl
mov [edx+(nf24_54-nf24_51)], bh
mov al, [esi+9]
mov ebx, [ecx+eax*4]
mov [edx+(nf24_61-nf24_51)], bl
mov [edx+(nf24_62-nf24_51)], bh
shr ebx, 16
mov [edx+(nf24_63-nf24_51)], bl
mov [edx+(nf24_64-nf24_51)], bh
mov al, [esi+10]
mov ebx, [ecx+eax*4]
mov [edx+(nf24_71-nf24_51)], bl
mov [edx+(nf24_72-nf24_51)], bh
shr ebx, 16
mov [edx+(nf24_73-nf24_51)], bl
mov [edx+(nf24_74-nf24_51)], bh
mov al, [esi+11]
mov ebx, [ecx+eax*4]
mov [edx+(nf24_81-nf24_51)], bl
mov [edx+(nf24_82-nf24_51)], bh
shr ebx, 16
mov [edx+(nf24_83-nf24_51)], bl
mov [edx+(nf24_84-nf24_51)], bh
push ebp
push esi
; load bx,dx,cx,bp with 00,01,10,11 color combinations
; (note that bits are read least significant first).
mov cx, [esi]
mov esi, nf_width
mov bl,cl
mov bh,cl
mov dl,ch
mov dh,cl
mov al,ch
mov ah,ch
mov ebp,eax
jmp nf24_0 ; flush prefetch
ALIGN 4
nf24_0:
nf24_11:mov ax, bx
shl eax, 16
nf24_12:mov ax, bx
mov [edi], eax
add edi, esi
nf24_13:mov ax, bx
shl eax, 16
nf24_14:mov ax, bx
mov [edi], eax
add edi, esi
nf24_21:mov ax, bx
shl eax, 16
nf24_22:mov ax, bx
mov [edi], eax
add edi, esi
nf24_23:mov ax, bx
shl eax, 16
nf24_24:mov ax, bx
mov [edi], eax
add edi, esi
nf24_31:mov ax, bx
shl eax, 16
nf24_32:mov ax, bx
mov [edi], eax
add edi, esi
nf24_33:mov ax, bx
shl eax, 16
nf24_34:mov ax, bx
mov [edi], eax
add edi, esi
nf24_41:mov ax, bx
shl eax, 16
nf24_42:mov ax, bx
mov [edi], eax
add edi, esi
nf24_43:mov ax, bx
shl eax, 16
nf24_44:mov ax, bx
mov [edi], eax
add edi, esi
lea eax, [esi*8-4]
sub edi, eax
mov eax, [esp]
mov cx, [eax+6]
mov bl,cl
mov bh,cl
mov dl,ch
mov dh,cl
mov al,ch
mov ah,ch
mov ebp,eax
nf24_51:mov ax, bx
shl eax, 16
nf24_52:mov ax, bx
mov [edi], eax
add edi, esi
nf24_53:mov ax, bx
shl eax, 16
nf24_54:mov ax, bx
mov [edi], eax
add edi, esi
nf24_61:mov ax, bx
shl eax, 16
nf24_62:mov ax, bx
mov [edi], eax
add edi, esi
nf24_63:mov ax, bx
shl eax, 16
nf24_64:mov ax, bx
mov [edi], eax
add edi, esi
nf24_71:mov ax, bx
shl eax, 16
nf24_72:mov ax, bx
mov [edi], eax
add edi, esi
nf24_73:mov ax, bx
shl eax, 16
nf24_74:mov ax, bx
mov [edi], eax
add edi, esi
nf24_81:mov ax, bx
shl eax, 16
nf24_82:mov ax, bx
mov [edi], eax
add edi, esi
nf24_83:mov ax, bx
shl eax, 16
nf24_84:mov ax, bx
mov [edi], eax
pop esi
pop ebp
add esi, 12
sub edi, 4
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
;----------------------------------------
ALIGN 4
;nf8+32
nf40: ; 1x2 8x4x1 (12 bytes)
xor eax, eax
lea ecx, nfpk_mov8
lea edx, byte ptr ds:nf40_11+2
mov al, [esi+2]
mov ebx, [ecx+eax*4]
mov [edx+(nf40_11-nf40_11)], bl
mov [edx+(nf40_12-nf40_11)], bh
shr ebx, 16
mov [edx+(nf40_13-nf40_11)], bl
mov [edx+(nf40_14-nf40_11)], bh
mov al, [esi+3]
mov ebx, [ecx+eax*4]
mov [edx+(nf40_21-nf40_11)], bl
mov [edx+(nf40_22-nf40_11)], bh
shr ebx, 16
mov [edx+(nf40_23-nf40_11)], bl
mov [edx+(nf40_24-nf40_11)], bh
mov al, [esi+4]
mov ebx, [ecx+eax*4]
mov [edx+(nf40_31-nf40_11)], bl
mov [edx+(nf40_32-nf40_11)], bh
shr ebx, 16
mov [edx+(nf40_33-nf40_11)], bl
mov [edx+(nf40_34-nf40_11)], bh
mov al, [esi+5]
mov ebx, [ecx+eax*4]
mov [edx+(nf40_41-nf40_11)], bl
mov [edx+(nf40_42-nf40_11)], bh
shr ebx, 16
mov [edx+(nf40_43-nf40_11)], bl
mov [edx+(nf40_44-nf40_11)], bh
add edx, nf40_51-nf40_11
mov al, [esi+8]
mov ebx, [ecx+eax*4]
mov [edx+(nf40_51-nf40_51)], bl
mov [edx+(nf40_52-nf40_51)], bh
shr ebx, 16
mov [edx+(nf40_53-nf40_51)], bl
mov [edx+(nf40_54-nf40_51)], bh
mov al, [esi+9]
mov ebx, [ecx+eax*4]
mov [edx+(nf40_61-nf40_51)], bl
mov [edx+(nf40_62-nf40_51)], bh
shr ebx, 16
mov [edx+(nf40_63-nf40_51)], bl
mov [edx+(nf40_64-nf40_51)], bh
mov al, [esi+10]
mov ebx, [ecx+eax*4]
mov [edx+(nf40_71-nf40_51)], bl
mov [edx+(nf40_72-nf40_51)], bh
shr ebx, 16
mov [edx+(nf40_73-nf40_51)], bl
mov [edx+(nf40_74-nf40_51)], bh
mov al, [esi+11]
mov ebx, [ecx+eax*4]
mov [edx+(nf40_81-nf40_51)], bl
mov [edx+(nf40_82-nf40_51)], bh
shr ebx, 16
mov [edx+(nf40_83-nf40_51)], bl
mov [edx+(nf40_84-nf40_51)], bh
push ebp
push esi
; load bx,dx,cx,bp with 00,01,10,11 color combinations
; (note that bits are read least significant first).
mov cx, [esi]
mov esi, nf_width
mov bl,cl
mov bh,cl
mov dl,ch
mov dh,cl
mov al,ch
mov ah,ch
mov ebp,eax
jmp nf40_0 ; flush prefetch
ALIGN 4
nf40_0:
nf40_11:mov ax, bx
shl eax, 16
nf40_12:mov ax, bx
mov [edi], eax
nf40_13:mov ax, bx
shl eax, 16
nf40_14:mov ax, bx
mov [edi+4], eax
add edi, esi
nf40_21:mov ax, bx
shl eax, 16
nf40_22:mov ax, bx
mov [edi], eax
nf40_23:mov ax, bx
shl eax, 16
nf40_24:mov ax, bx
mov [edi+4], eax
add edi, esi
nf40_31:mov ax, bx
shl eax, 16
nf40_32:mov ax, bx
mov [edi], eax
nf40_33:mov ax, bx
shl eax, 16
nf40_34:mov ax, bx
mov [edi+4], eax
add edi, esi
nf40_41:mov ax, bx
shl eax, 16
nf40_42:mov ax, bx
mov [edi], eax
nf40_43:mov ax, bx
shl eax, 16
nf40_44:mov ax, bx
mov [edi+4], eax
add edi, esi
mov eax, [esp]
mov cx, [eax+6]
mov bl,cl
mov bh,cl
mov dl,ch
mov dh,cl
mov al,ch
mov ah,ch
mov ebp,eax
nf40_51:mov ax, bx
shl eax, 16
nf40_52:mov ax, bx
mov [edi], eax
nf40_53:mov ax, bx
shl eax, 16
nf40_54:mov ax, bx
mov [edi+4], eax
add edi, esi
nf40_61:mov ax, bx
shl eax, 16
nf40_62:mov ax, bx
mov [edi], eax
nf40_63:mov ax, bx
shl eax, 16
nf40_64:mov ax, bx
mov [edi+4], eax
add edi, esi
nf40_71:mov ax, bx
shl eax, 16
nf40_72:mov ax, bx
mov [edi], eax
nf40_73:mov ax, bx
shl eax, 16
nf40_74:mov ax, bx
mov [edi+4], eax
add edi, esi
nf40_81:mov ax, bx
shl eax, 16
nf40_82:mov ax, bx
mov [edi], eax
nf40_83:mov ax, bx
shl eax, 16
nf40_84:mov ax, bx
mov [edi+4], eax
pop esi
pop ebp
add esi, 12
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
;----------------------------------------
ALIGN 4
nf9: ; 8x8x2 (20 bytes)
mov eax, [esi]
cmp al, ah
ja nf41
shr eax, 16
cmp al, ah
ja nf25
xor eax, eax
lea ecx, nfpk_mov4
lea edx, byte ptr ds:nf9_11+1
mov al, [esi+4]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_11-nf9_11)], bl
mov [edx+(nf9_12-nf9_11)], bh
shr ebx, 16
mov [edx+(nf9_13-nf9_11)], bl
mov [edx+(nf9_14-nf9_11)], bh
mov al, [esi+5]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_15-nf9_11)], bl
mov [edx+(nf9_16-nf9_11)], bh
shr ebx, 16
mov [edx+(nf9_17-nf9_11)], bl
mov [edx+(nf9_18-nf9_11)], bh
mov al, [esi+6]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_21-nf9_11)], bl
mov [edx+(nf9_22-nf9_11)], bh
shr ebx, 16
mov [edx+(nf9_23-nf9_11)], bl
mov [edx+(nf9_24-nf9_11)], bh
mov al, [esi+7]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_25-nf9_11)], bl
mov [edx+(nf9_26-nf9_11)], bh
shr ebx, 16
mov [edx+(nf9_27-nf9_11)], bl
mov [edx+(nf9_28-nf9_11)], bh
mov al, [esi+8]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_31-nf9_11)], bl
mov [edx+(nf9_32-nf9_11)], bh
shr ebx, 16
mov [edx+(nf9_33-nf9_11)], bl
mov [edx+(nf9_34-nf9_11)], bh
mov al, [esi+9]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_35-nf9_11)], bl
mov [edx+(nf9_36-nf9_11)], bh
shr ebx, 16
mov [edx+(nf9_37-nf9_11)], bl
mov [edx+(nf9_38-nf9_11)], bh
mov al, [esi+10]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_41-nf9_11)], bl
mov [edx+(nf9_42-nf9_11)], bh
shr ebx, 16
mov [edx+(nf9_43-nf9_11)], bl
mov [edx+(nf9_44-nf9_11)], bh
mov al, [esi+11]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_45-nf9_11)], bl
mov [edx+(nf9_46-nf9_11)], bh
shr ebx, 16
mov [edx+(nf9_47-nf9_11)], bl
mov [edx+(nf9_48-nf9_11)], bh
lea edx, [edx+(nf9_51-nf9_11)]
mov al, [esi+12]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_51-nf9_51)], bl
mov [edx+(nf9_52-nf9_51)], bh
shr ebx, 16
mov [edx+(nf9_53-nf9_51)], bl
mov [edx+(nf9_54-nf9_51)], bh
mov al, [esi+13]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_55-nf9_51)], bl
mov [edx+(nf9_56-nf9_51)], bh
shr ebx, 16
mov [edx+(nf9_57-nf9_51)], bl
mov [edx+(nf9_58-nf9_51)], bh
mov al, [esi+14]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_61-nf9_51)], bl
mov [edx+(nf9_62-nf9_51)], bh
shr ebx, 16
mov [edx+(nf9_63-nf9_51)], bl
mov [edx+(nf9_64-nf9_51)], bh
mov al, [esi+15]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_65-nf9_51)], bl
mov [edx+(nf9_66-nf9_51)], bh
shr ebx, 16
mov [edx+(nf9_67-nf9_51)], bl
mov [edx+(nf9_68-nf9_51)], bh
mov al, [esi+16]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_71-nf9_51)], bl
mov [edx+(nf9_72-nf9_51)], bh
shr ebx, 16
mov [edx+(nf9_73-nf9_51)], bl
mov [edx+(nf9_74-nf9_51)], bh
mov al, [esi+17]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_75-nf9_51)], bl
mov [edx+(nf9_76-nf9_51)], bh
shr ebx, 16
mov [edx+(nf9_77-nf9_51)], bl
mov [edx+(nf9_78-nf9_51)], bh
mov al, [esi+18]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_81-nf9_51)], bl
mov [edx+(nf9_82-nf9_51)], bh
shr ebx, 16
mov [edx+(nf9_83-nf9_51)], bl
mov [edx+(nf9_84-nf9_51)], bh
mov al, [esi+19]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_85-nf9_51)], bl
mov [edx+(nf9_86-nf9_51)], bh
shr ebx, 16
mov [edx+(nf9_87-nf9_51)], bl
mov [edx+(nf9_88-nf9_51)], bh
; Load bl,bh,cl,ch with four colors
mov bx, [esi]
mov cx, [esi+2]
mov edx, nf_width
jmp nf9_0 ; flush prefetch
ALIGN 4
nf9_0:
nf9_11: mov al, bl
nf9_12: mov ah, bl
shl eax, 16
nf9_13: mov al, bl
nf9_14: mov ah, bl
mov [edi], eax
nf9_15: mov al, bl
nf9_16: mov ah, bl
shl eax, 16
nf9_17: mov al, bl
nf9_18: mov ah, bl
mov [edi+4], eax
add edi, edx
nf9_21: mov al, bl
nf9_22: mov ah, bl
shl eax, 16
nf9_23: mov al, bl
nf9_24: mov ah, bl
mov [edi], eax
nf9_25: mov al, bl
nf9_26: mov ah, bl
shl eax, 16
nf9_27: mov al, bl
nf9_28: mov ah, bl
mov [edi+4], eax
add edi, edx
nf9_31: mov al, bl
nf9_32: mov ah, bl
shl eax, 16
nf9_33: mov al, bl
nf9_34: mov ah, bl
mov [edi], eax
nf9_35: mov al, bl
nf9_36: mov ah, bl
shl eax, 16
nf9_37: mov al, bl
nf9_38: mov ah, bl
mov [edi+4], eax
add edi, edx
nf9_41: mov al, bl
nf9_42: mov ah, bl
shl eax, 16
nf9_43: mov al, bl
nf9_44: mov ah, bl
mov [edi], eax
nf9_45: mov al, bl
nf9_46: mov ah, bl
shl eax, 16
nf9_47: mov al, bl
nf9_48: mov ah, bl
mov [edi+4], eax
add edi, edx
nf9_51: mov al, bl
nf9_52: mov ah, bl
shl eax, 16
nf9_53: mov al, bl
nf9_54: mov ah, bl
mov [edi], eax
nf9_55: mov al, bl
nf9_56: mov ah, bl
shl eax, 16
nf9_57: mov al, bl
nf9_58: mov ah, bl
mov [edi+4], eax
add edi, edx
nf9_61: mov al, bl
nf9_62: mov ah, bl
shl eax, 16
nf9_63: mov al, bl
nf9_64: mov ah, bl
mov [edi], eax
nf9_65: mov al, bl
nf9_66: mov ah, bl
shl eax, 16
nf9_67: mov al, bl
nf9_68: mov ah, bl
mov [edi+4], eax
add edi, edx
nf9_71: mov al, bl
nf9_72: mov ah, bl
shl eax, 16
nf9_73: mov al, bl
nf9_74: mov ah, bl
mov [edi], eax
nf9_75: mov al, bl
nf9_76: mov ah, bl
shl eax, 16
nf9_77: mov al, bl
nf9_78: mov ah, bl
mov [edi+4], eax
add edi, edx
nf9_81: mov al, bl
nf9_82: mov ah, bl
shl eax, 16
nf9_83: mov al, bl
nf9_84: mov ah, bl
mov [edi], eax
nf9_85: mov al, bl
nf9_86: mov ah, bl
shl eax, 16
nf9_87: mov al, bl
nf9_88: mov ah, bl
mov [edi+4], eax
add esi, 20
sub edi, nfpk_back_right
retn
;----------------------------------------
ALIGN 4
;nf9+16
nf25: ; low 4x4x2 (8 bytes)
if 0 ;debug
mov eax, 0
mov ebx, 0
add esi, 8
jmp nf_solid
endif
xor eax, eax
lea ecx, nfpk_mov4
lea edx, byte ptr ds:nf25_11+1
mov al, [esi+4]
mov ebx, [ecx+eax*4]
mov [edx+(nf25_14-nf25_11)], bl
mov [edx+(nf25_13-nf25_11)], bh
shr ebx, 16
mov [edx+(nf25_12-nf25_11)], bl
mov [edx+(nf25_11-nf25_11)], bh
mov al, [esi+5]
mov ebx, [ecx+eax*4]
mov [edx+(nf25_24-nf25_11)], bl
mov [edx+(nf25_23-nf25_11)], bh
shr ebx, 16
mov [edx+(nf25_22-nf25_11)], bl
mov [edx+(nf25_21-nf25_11)], bh
mov al, [esi+6]
mov ebx, [ecx+eax*4]
mov [edx+(nf25_34-nf25_11)], bl
mov [edx+(nf25_33-nf25_11)], bh
shr ebx, 16
mov [edx+(nf25_32-nf25_11)], bl
mov [edx+(nf25_31-nf25_11)], bh
mov al, [esi+7]
mov ebx, [ecx+eax*4]
mov [edx+(nf25_44-nf25_11)], bl
mov [edx+(nf25_43-nf25_11)], bh
shr ebx, 16
mov [edx+(nf25_42-nf25_11)], bl
mov [edx+(nf25_41-nf25_11)], bh
; Load bl,bh,cl,ch with four colors
mov bx, [esi]
mov cx, [esi+2]
mov edx, nf_width
jmp nf25_0 ; flush prefetch
ALIGN 4
nf25_0:
nf25_11:mov ah, bl
mov al, ah
shl eax, 16
nf25_12:mov al, bl
mov ah, al
mov [edi], eax
mov [edi+edx], eax
nf25_13:mov ah, bl
mov al, ah
shl eax, 16
nf25_14:mov al, bl
mov ah, al
mov [edi+4], eax
mov [edi+edx+4], eax
lea edi, [edi+edx*2]
nf25_21:mov ah, bl
mov al, ah
shl eax, 16
nf25_22:mov al, bl
mov ah, al
mov [edi], eax
mov [edi+edx], eax
nf25_23:mov ah, bl
mov al, ah
shl eax, 16
nf25_24:mov al, bl
mov ah, al
mov [edi+4], eax
mov [edi+edx+4], eax
lea edi, [edi+edx*2]
nf25_31:mov ah, bl
mov al, ah
shl eax, 16
nf25_32:mov al, bl
mov ah, al
mov [edi], eax
mov [edi+edx], eax
nf25_33:mov ah, bl
mov al, ah
shl eax, 16
nf25_34:mov al, bl
mov ah, al
mov [edi+4], eax
mov [edi+edx+4], eax
lea edi, [edi+edx*2]
nf25_41:mov ah, bl
mov al, ah
shl eax, 16
nf25_42:mov al, bl
mov ah, al
mov [edi], eax
mov [edi+edx], eax
nf25_43:mov ah, bl
mov al, ah
shl eax, 16
nf25_44:mov al, bl
mov ah, al
mov [edi+4], eax
mov [edi+edx+4], eax
add edi, edx
add esi, 8
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
;----------------------------------------
ALIGN 4
;nf9+32
nf41: ; low 4x8x2 (12 bytes)
shr eax, 16
cmp al, ah
ja nf57
xor eax, eax
lea ecx, nfpk_mov4
lea edx, byte ptr ds:nf41_11+1
mov al, [esi+4]
mov ebx, [ecx+eax*4]
mov [edx+(nf41_14-nf41_11)], bl
mov [edx+(nf41_13-nf41_11)], bh
shr ebx, 16
mov [edx+(nf41_12-nf41_11)], bl
mov [edx+(nf41_11-nf41_11)], bh
mov al, [esi+5]
mov ebx, [ecx+eax*4]
mov [edx+(nf41_24-nf41_11)], bl
mov [edx+(nf41_23-nf41_11)], bh
shr ebx, 16
mov [edx+(nf41_22-nf41_11)], bl
mov [edx+(nf41_21-nf41_11)], bh
mov al, [esi+6]
mov ebx, [ecx+eax*4]
mov [edx+(nf41_34-nf41_11)], bl
mov [edx+(nf41_33-nf41_11)], bh
shr ebx, 16
mov [edx+(nf41_32-nf41_11)], bl
mov [edx+(nf41_31-nf41_11)], bh
mov al, [esi+7]
mov ebx, [ecx+eax*4]
mov [edx+(nf41_44-nf41_11)], bl
mov [edx+(nf41_43-nf41_11)], bh
shr ebx, 16
mov [edx+(nf41_42-nf41_11)], bl
mov [edx+(nf41_41-nf41_11)], bh
lea edx, [edx+(nf41_51-nf41_11)]
mov al, [esi+8]
mov ebx, [ecx+eax*4]
mov [edx+(nf41_54-nf41_51)], bl
mov [edx+(nf41_53-nf41_51)], bh
shr ebx, 16
mov [edx+(nf41_52-nf41_51)], bl
mov [edx+(nf41_51-nf41_51)], bh
mov al, [esi+9]
mov ebx, [ecx+eax*4]
mov [edx+(nf41_64-nf41_51)], bl
mov [edx+(nf41_63-nf41_51)], bh
shr ebx, 16
mov [edx+(nf41_62-nf41_51)], bl
mov [edx+(nf41_61-nf41_51)], bh
mov al, [esi+10]
mov ebx, [ecx+eax*4]
mov [edx+(nf41_74-nf41_51)], bl
mov [edx+(nf41_73-nf41_51)], bh
shr ebx, 16
mov [edx+(nf41_72-nf41_51)], bl
mov [edx+(nf41_71-nf41_51)], bh
mov al, [esi+11]
mov ebx, [ecx+eax*4]
mov [edx+(nf41_84-nf41_51)], bl
mov [edx+(nf41_83-nf41_51)], bh
shr ebx, 16
mov [edx+(nf41_82-nf41_51)], bl
mov [edx+(nf41_81-nf41_51)], bh
; Load bl,bh,cl,ch with four colors
mov bx, [esi]
mov cx, [esi+2]
mov edx, nf_width
jmp nf41_0 ; flush prefetch
ALIGN 4
nf41_0:
nf41_11:mov ah, bl
mov al, ah
shl eax, 16
nf41_12:mov al, bl
mov ah, al
mov [edi], eax
nf41_13:mov ah, bl
mov al, ah
shl eax, 16
nf41_14:mov al, bl
mov ah, al
mov [edi+4], eax
add edi, edx
nf41_21:mov ah, bl
mov al, ah
shl eax, 16
nf41_22:mov al, bl
mov ah, al
mov [edi], eax
nf41_23:mov ah, bl
mov al, ah
shl eax, 16
nf41_24:mov al, bl
mov ah, al
mov [edi+4], eax
add edi, edx
nf41_31:mov ah, bl
mov al, ah
shl eax, 16
nf41_32:mov al, bl
mov ah, al
mov [edi], eax
nf41_33:mov ah, bl
mov al, ah
shl eax, 16
nf41_34:mov al, bl
mov ah, al
mov [edi+4], eax
add edi, edx
nf41_41:mov ah, bl
mov al, ah
shl eax, 16
nf41_42:mov al, bl
mov ah, al
mov [edi], eax
nf41_43:mov ah, bl
mov al, ah
shl eax, 16
nf41_44:mov al, bl
mov ah, al
mov [edi+4], eax
add edi, edx
nf41_51:mov ah, bl
mov al, ah
shl eax, 16
nf41_52:mov al, bl
mov ah, al
mov [edi], eax
nf41_53:mov ah, bl
mov al, ah
shl eax, 16
nf41_54:mov al, bl
mov ah, al
mov [edi+4], eax
add edi, edx
nf41_61:mov ah, bl
mov al, ah
shl eax, 16
nf41_62:mov al, bl
mov ah, al
mov [edi], eax
nf41_63:mov ah, bl
mov al, ah
shl eax, 16
nf41_64:mov al, bl
mov ah, al
mov [edi+4], eax
add edi, edx
nf41_71:mov ah, bl
mov al, ah
shl eax, 16
nf41_72:mov al, bl
mov ah, al
mov [edi], eax
nf41_73:mov ah, bl
mov al, ah
shl eax, 16
nf41_74:mov al, bl
mov ah, al
mov [edi+4], eax
add edi, edx
nf41_81:mov ah, bl
mov al, ah
shl eax, 16
nf41_82:mov al, bl
mov ah, al
mov [edi], eax
nf41_83:mov ah, bl
mov al, ah
shl eax, 16
nf41_84:mov al, bl
mov ah, al
mov [edi+4], eax
add esi, 12
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
;----------------------------------------
ALIGN 4
;nf9+48
nf57: ; low 8x4x2 (12 bytes)
xor eax, eax
lea ecx, nfpk_mov4
lea edx, byte ptr ds:nf57_11+1
mov al, [esi+4]
mov ebx, [ecx+eax*4]
mov [edx+(nf57_11-nf57_11)], bl
mov [edx+(nf57_12-nf57_11)], bh
shr ebx, 16
mov [edx+(nf57_13-nf57_11)], bl
mov [edx+(nf57_14-nf57_11)], bh
mov al, [esi+5]
mov ebx, [ecx+eax*4]
mov [edx+(nf57_15-nf57_11)], bl
mov [edx+(nf57_16-nf57_11)], bh
shr ebx, 16
mov [edx+(nf57_17-nf57_11)], bl
mov [edx+(nf57_18-nf57_11)], bh
mov al, [esi+6]
mov ebx, [ecx+eax*4]
mov [edx+(nf57_21-nf57_11)], bl
mov [edx+(nf57_22-nf57_11)], bh
shr ebx, 16
mov [edx+(nf57_23-nf57_11)], bl
mov [edx+(nf57_24-nf57_11)], bh
mov al, [esi+7]
mov ebx, [ecx+eax*4]
mov [edx+(nf57_25-nf57_11)], bl
mov [edx+(nf57_26-nf57_11)], bh
shr ebx, 16
mov [edx+(nf57_27-nf57_11)], bl
mov [edx+(nf57_28-nf57_11)], bh
mov al, [esi+8]
mov ebx, [ecx+eax*4]
mov [edx+(nf57_31-nf57_11)], bl
mov [edx+(nf57_32-nf57_11)], bh
shr ebx, 16
mov [edx+(nf57_33-nf57_11)], bl
mov [edx+(nf57_34-nf57_11)], bh
mov al, [esi+9]
mov ebx, [ecx+eax*4]
mov [edx+(nf57_35-nf57_11)], bl
mov [edx+(nf57_36-nf57_11)], bh
shr ebx, 16
mov [edx+(nf57_37-nf57_11)], bl
mov [edx+(nf57_38-nf57_11)], bh
mov al, [esi+10]
mov ebx, [ecx+eax*4]
mov [edx+(nf57_41-nf57_11)], bl
mov [edx+(nf57_42-nf57_11)], bh
shr ebx, 16
mov [edx+(nf57_43-nf57_11)], bl
mov [edx+(nf57_44-nf57_11)], bh
mov al, [esi+11]
mov ebx, [ecx+eax*4]
mov [edx+(nf57_45-nf57_11)], bl
mov [edx+(nf57_46-nf57_11)], bh
shr ebx, 16
mov [edx+(nf57_47-nf57_11)], bl
mov [edx+(nf57_48-nf57_11)], bh
; Load bl,bh,cl,ch with four colors
mov bx, [esi]
mov cx, [esi+2]
mov edx, nf_width
jmp nf57_0 ; flush prefetch
ALIGN 4
nf57_0:
nf57_11:mov al, bl
nf57_12:mov ah, bl
shl eax, 16
nf57_13:mov al, bl
nf57_14:mov ah, bl
mov [edi], eax
mov [edi+edx], eax
nf57_15:mov al, bl
nf57_16:mov ah, bl
shl eax, 16
nf57_17:mov al, bl
nf57_18:mov ah, bl
mov [edi+4], eax
mov [edi+edx+4], eax
lea edi, [edi+edx*2]
nf57_21:mov al, bl
nf57_22:mov ah, bl
shl eax, 16
nf57_23:mov al, bl
nf57_24:mov ah, bl
mov [edi], eax
mov [edi+edx], eax
nf57_25:mov al, bl
nf57_26:mov ah, bl
shl eax, 16
nf57_27:mov al, bl
nf57_28:mov ah, bl
mov [edi+4], eax
mov [edi+edx+4], eax
lea edi, [edi+edx*2]
nf57_31:mov al, bl
nf57_32:mov ah, bl
shl eax, 16
nf57_33:mov al, bl
nf57_34:mov ah, bl
mov [edi], eax
mov [edi+edx], eax
nf57_35:mov al, bl
nf57_36:mov ah, bl
shl eax, 16
nf57_37:mov al, bl
nf57_38:mov ah, bl
mov [edi+4], eax
mov [edi+edx+4], eax
lea edi, [edi+edx*2]
nf57_41:mov al, bl
nf57_42:mov ah, bl
shl eax, 16
nf57_43:mov al, bl
nf57_44:mov ah, bl
mov [edi], eax
mov [edi+edx], eax
nf57_45:mov al, bl
nf57_46:mov ah, bl
shl eax, 16
nf57_47:mov al, bl
nf57_48:mov ah, bl
mov [edi+4], eax
mov [edi+edx+4], eax
add edi, edx
add esi, 12
sub edi, nfpk_back_right
retn
;----------------------------------------
ALIGN 4
nf10: ; 2x2 4x4x2 (32 bytes)
mov ax, [esi]
cmp al, ah
ja nf26
xor eax, eax
lea ecx, nfpk_mov4
lea edx, byte ptr ds:nf10_11+1
mov al, [esi+4]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_11-nf10_11)], bl
mov [edx+(nf10_12-nf10_11)], bh
shr ebx, 16
mov [edx+(nf10_13-nf10_11)], bl
mov [edx+(nf10_14-nf10_11)], bh
mov al, [esi+5]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_15-nf10_11)], bl
mov [edx+(nf10_16-nf10_11)], bh
shr ebx, 16
mov [edx+(nf10_17-nf10_11)], bl
mov [edx+(nf10_18-nf10_11)], bh
mov al, [esi+6]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_21-nf10_11)], bl
mov [edx+(nf10_22-nf10_11)], bh
shr ebx, 16
mov [edx+(nf10_23-nf10_11)], bl
mov [edx+(nf10_24-nf10_11)], bh
mov al, [esi+7]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_25-nf10_11)], bl
mov [edx+(nf10_26-nf10_11)], bh
shr ebx, 16
mov [edx+(nf10_27-nf10_11)], bl
mov [edx+(nf10_28-nf10_11)], bh
mov al, [esi+12]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_31-nf10_11)], bl
mov [edx+(nf10_32-nf10_11)], bh
shr ebx, 16
mov [edx+(nf10_33-nf10_11)], bl
mov [edx+(nf10_34-nf10_11)], bh
mov al, [esi+13]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_35-nf10_11)], bl
mov [edx+(nf10_36-nf10_11)], bh
shr ebx, 16
mov [edx+(nf10_37-nf10_11)], bl
mov [edx+(nf10_38-nf10_11)], bh
mov al, [esi+14]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_41-nf10_11)], bl
mov [edx+(nf10_42-nf10_11)], bh
shr ebx, 16
mov [edx+(nf10_43-nf10_11)], bl
mov [edx+(nf10_44-nf10_11)], bh
mov al, [esi+15]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_45-nf10_11)], bl
mov [edx+(nf10_46-nf10_11)], bh
shr ebx, 16
mov [edx+(nf10_47-nf10_11)], bl
mov [edx+(nf10_48-nf10_11)], bh
lea edx, [edx+(nf10_51-nf10_11)]
mov al, [esi+20]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_51-nf10_51)], bl
mov [edx+(nf10_52-nf10_51)], bh
shr ebx, 16
mov [edx+(nf10_53-nf10_51)], bl
mov [edx+(nf10_54-nf10_51)], bh
mov al, [esi+21]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_55-nf10_51)], bl
mov [edx+(nf10_56-nf10_51)], bh
shr ebx, 16
mov [edx+(nf10_57-nf10_51)], bl
mov [edx+(nf10_58-nf10_51)], bh
mov al, [esi+22]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_61-nf10_51)], bl
mov [edx+(nf10_62-nf10_51)], bh
shr ebx, 16
mov [edx+(nf10_63-nf10_51)], bl
mov [edx+(nf10_64-nf10_51)], bh
mov al, [esi+23]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_65-nf10_51)], bl
mov [edx+(nf10_66-nf10_51)], bh
shr ebx, 16
mov [edx+(nf10_67-nf10_51)], bl
mov [edx+(nf10_68-nf10_51)], bh
mov al, [esi+28]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_71-nf10_51)], bl
mov [edx+(nf10_72-nf10_51)], bh
shr ebx, 16
mov [edx+(nf10_73-nf10_51)], bl
mov [edx+(nf10_74-nf10_51)], bh
mov al, [esi+29]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_75-nf10_51)], bl
mov [edx+(nf10_76-nf10_51)], bh
shr ebx, 16
mov [edx+(nf10_77-nf10_51)], bl
mov [edx+(nf10_78-nf10_51)], bh
mov al, [esi+30]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_81-nf10_51)], bl
mov [edx+(nf10_82-nf10_51)], bh
shr ebx, 16
mov [edx+(nf10_83-nf10_51)], bl
mov [edx+(nf10_84-nf10_51)], bh
mov al, [esi+31]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_85-nf10_51)], bl
mov [edx+(nf10_86-nf10_51)], bh
shr ebx, 16
mov [edx+(nf10_87-nf10_51)], bl
mov [edx+(nf10_88-nf10_51)], bh
; Load bl,bh,cl,ch with four colors
mov bx, [esi]
mov cx, [esi+2]
mov edx, nf_width
jmp nf10_0 ; flush prefetch
ALIGN 4
nf10_0:
nf10_11:mov al, bl
nf10_12:mov ah, bl
shl eax, 16
nf10_13:mov al, bl
nf10_14:mov ah, bl
mov [edi], eax
add edi, edx
nf10_15:mov al, bl
nf10_16:mov ah, bl
shl eax, 16
nf10_17:mov al, bl
nf10_18:mov ah, bl
mov [edi], eax
add edi, edx
nf10_21:mov al, bl
nf10_22:mov ah, bl
shl eax, 16
nf10_23:mov al, bl
nf10_24:mov ah, bl
mov [edi], eax
add edi, edx
nf10_25:mov al, bl
nf10_26:mov ah, bl
shl eax, 16
nf10_27:mov al, bl
nf10_28:mov ah, bl
mov [edi], eax
add edi, edx
; Load bl,bh,cl,ch with four colors
mov bx, [esi+8]
mov cx, [esi+10]
nf10_31:mov al, bl
nf10_32:mov ah, bl
shl eax, 16
nf10_33:mov al, bl
nf10_34:mov ah, bl
mov [edi], eax
add edi, edx
nf10_35:mov al, bl
nf10_36:mov ah, bl
shl eax, 16
nf10_37:mov al, bl
nf10_38:mov ah, bl
mov [edi], eax
add edi, edx
nf10_41:mov al, bl
nf10_42:mov ah, bl
shl eax, 16
nf10_43:mov al, bl
nf10_44:mov ah, bl
mov [edi], eax
add edi, edx
nf10_45:mov al, bl
nf10_46:mov ah, bl
shl eax, 16
nf10_47:mov al, bl
nf10_48:mov ah, bl
mov [edi], eax
add edi, edx
lea eax, [edx*8-4]
sub edi, eax
; Load bl,bh,cl,ch with four colors
mov bx, [esi+16]
mov cx, [esi+18]
nf10_51:mov al, bl
nf10_52:mov ah, bl
shl eax, 16
nf10_53:mov al, bl
nf10_54:mov ah, bl
mov [edi], eax
add edi, edx
nf10_55:mov al, bl
nf10_56:mov ah, bl
shl eax, 16
nf10_57:mov al, bl
nf10_58:mov ah, bl
mov [edi], eax
add edi, edx
nf10_61:mov al, bl
nf10_62:mov ah, bl
shl eax, 16
nf10_63:mov al, bl
nf10_64:mov ah, bl
mov [edi], eax
add edi, edx
nf10_65:mov al, bl
nf10_66:mov ah, bl
shl eax, 16
nf10_67:mov al, bl
nf10_68:mov ah, bl
mov [edi], eax
add edi, edx
; Load bl,bh,cl,ch with four colors
mov bx, [esi+24]
mov cx, [esi+26]
nf10_71:mov al, bl
nf10_72:mov ah, bl
shl eax, 16
nf10_73:mov al, bl
nf10_74:mov ah, bl
mov [edi], eax
add edi, edx
nf10_75:mov al, bl
nf10_76:mov ah, bl
shl eax, 16
nf10_77:mov al, bl
nf10_78:mov ah, bl
mov [edi], eax
add edi, edx
nf10_81:mov al, bl
nf10_82:mov ah, bl
shl eax, 16
nf10_83:mov al, bl
nf10_84:mov ah, bl
mov [edi], eax
add edi, edx
nf10_85:mov al, bl
nf10_86:mov ah, bl
shl eax, 16
nf10_87:mov al, bl
nf10_88:mov ah, bl
mov [edi], eax
add esi, 32
sub edi, 4
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
;----------------------------------------
ALIGN 4
;nf10+16
nf26: ; 2x1 4x8x2 (24 bytes)
mov ax, [esi+12]
cmp al, ah
ja nf42
if 0 ;debug
mov eax, 0
mov ebx, 0
add esi, 24
jmp nf_solid
endif
xor eax, eax
lea ecx, nfpk_mov4
lea edx, byte ptr ds:nf26_11+1
mov al, [esi+4]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_11-nf26_11)], bl
mov [edx+(nf26_12-nf26_11)], bh
shr ebx, 16
mov [edx+(nf26_13-nf26_11)], bl
mov [edx+(nf26_14-nf26_11)], bh
mov al, [esi+5]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_15-nf26_11)], bl
mov [edx+(nf26_16-nf26_11)], bh
shr ebx, 16
mov [edx+(nf26_17-nf26_11)], bl
mov [edx+(nf26_18-nf26_11)], bh
mov al, [esi+6]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_21-nf26_11)], bl
mov [edx+(nf26_22-nf26_11)], bh
shr ebx, 16
mov [edx+(nf26_23-nf26_11)], bl
mov [edx+(nf26_24-nf26_11)], bh
mov al, [esi+7]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_25-nf26_11)], bl
mov [edx+(nf26_26-nf26_11)], bh
shr ebx, 16
mov [edx+(nf26_27-nf26_11)], bl
mov [edx+(nf26_28-nf26_11)], bh
mov al, [esi+8]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_31-nf26_11)], bl
mov [edx+(nf26_32-nf26_11)], bh
shr ebx, 16
mov [edx+(nf26_33-nf26_11)], bl
mov [edx+(nf26_34-nf26_11)], bh
mov al, [esi+9]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_35-nf26_11)], bl
mov [edx+(nf26_36-nf26_11)], bh
shr ebx, 16
mov [edx+(nf26_37-nf26_11)], bl
mov [edx+(nf26_38-nf26_11)], bh
mov al, [esi+10]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_41-nf26_11)], bl
mov [edx+(nf26_42-nf26_11)], bh
shr ebx, 16
mov [edx+(nf26_43-nf26_11)], bl
mov [edx+(nf26_44-nf26_11)], bh
mov al, [esi+11]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_45-nf26_11)], bl
mov [edx+(nf26_46-nf26_11)], bh
shr ebx, 16
mov [edx+(nf26_47-nf26_11)], bl
mov [edx+(nf26_48-nf26_11)], bh
lea edx, [edx+(nf26_51-nf26_11)]
mov al, [esi+16]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_51-nf26_51)], bl
mov [edx+(nf26_52-nf26_51)], bh
shr ebx, 16
mov [edx+(nf26_53-nf26_51)], bl
mov [edx+(nf26_54-nf26_51)], bh
mov al, [esi+17]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_55-nf26_51)], bl
mov [edx+(nf26_56-nf26_51)], bh
shr ebx, 16
mov [edx+(nf26_57-nf26_51)], bl
mov [edx+(nf26_58-nf26_51)], bh
mov al, [esi+18]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_61-nf26_51)], bl
mov [edx+(nf26_62-nf26_51)], bh
shr ebx, 16
mov [edx+(nf26_63-nf26_51)], bl
mov [edx+(nf26_64-nf26_51)], bh
mov al, [esi+19]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_65-nf26_51)], bl
mov [edx+(nf26_66-nf26_51)], bh
shr ebx, 16
mov [edx+(nf26_67-nf26_51)], bl
mov [edx+(nf26_68-nf26_51)], bh
mov al, [esi+20]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_71-nf26_51)], bl
mov [edx+(nf26_72-nf26_51)], bh
shr ebx, 16
mov [edx+(nf26_73-nf26_51)], bl
mov [edx+(nf26_74-nf26_51)], bh
mov al, [esi+21]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_75-nf26_51)], bl
mov [edx+(nf26_76-nf26_51)], bh
shr ebx, 16
mov [edx+(nf26_77-nf26_51)], bl
mov [edx+(nf26_78-nf26_51)], bh
mov al, [esi+22]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_81-nf26_51)], bl
mov [edx+(nf26_82-nf26_51)], bh
shr ebx, 16
mov [edx+(nf26_83-nf26_51)], bl
mov [edx+(nf26_84-nf26_51)], bh
mov al, [esi+23]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_85-nf26_51)], bl
mov [edx+(nf26_86-nf26_51)], bh
shr ebx, 16
mov [edx+(nf26_87-nf26_51)], bl
mov [edx+(nf26_88-nf26_51)], bh
; Load bl,bh,cl,ch with four colors
mov bx, [esi]
mov cx, [esi+2]
mov edx, nf_width
jmp nf26_0 ; flush prefetch
ALIGN 4
nf26_0:
nf26_11:mov al, bl
nf26_12:mov ah, bl
shl eax, 16
nf26_13:mov al, bl
nf26_14:mov ah, bl
mov [edi], eax
add edi, edx
nf26_15:mov al, bl
nf26_16:mov ah, bl
shl eax, 16
nf26_17:mov al, bl
nf26_18:mov ah, bl
mov [edi], eax
add edi, edx
nf26_21:mov al, bl
nf26_22:mov ah, bl
shl eax, 16
nf26_23:mov al, bl
nf26_24:mov ah, bl
mov [edi], eax
add edi, edx
nf26_25:mov al, bl
nf26_26:mov ah, bl
shl eax, 16
nf26_27:mov al, bl
nf26_28:mov ah, bl
mov [edi], eax
add edi, edx
nf26_31:mov al, bl
nf26_32:mov ah, bl
shl eax, 16
nf26_33:mov al, bl
nf26_34:mov ah, bl
mov [edi], eax
add edi, edx
nf26_35:mov al, bl
nf26_36:mov ah, bl
shl eax, 16
nf26_37:mov al, bl
nf26_38:mov ah, bl
mov [edi], eax
add edi, edx
nf26_41:mov al, bl
nf26_42:mov ah, bl
shl eax, 16
nf26_43:mov al, bl
nf26_44:mov ah, bl
mov [edi], eax
add edi, edx
nf26_45:mov al, bl
nf26_46:mov ah, bl
shl eax, 16
nf26_47:mov al, bl
nf26_48:mov ah, bl
mov [edi], eax
add edi, edx
lea eax, [edx*8-4]
sub edi, eax
; Load bl,bh,cl,ch with four colors
mov bx, [esi+12]
mov cx, [esi+14]
nf26_51:mov al, bl
nf26_52:mov ah, bl
shl eax, 16
nf26_53:mov al, bl
nf26_54:mov ah, bl
mov [edi], eax
add edi, edx
nf26_55:mov al, bl
nf26_56:mov ah, bl
shl eax, 16
nf26_57:mov al, bl
nf26_58:mov ah, bl
mov [edi], eax
add edi, edx
nf26_61:mov al, bl
nf26_62:mov ah, bl
shl eax, 16
nf26_63:mov al, bl
nf26_64:mov ah, bl
mov [edi], eax
add edi, edx
nf26_65:mov al, bl
nf26_66:mov ah, bl
shl eax, 16
nf26_67:mov al, bl
nf26_68:mov ah, bl
mov [edi], eax
add edi, edx
nf26_71:mov al, bl
nf26_72:mov ah, bl
shl eax, 16
nf26_73:mov al, bl
nf26_74:mov ah, bl
mov [edi], eax
add edi, edx
nf26_75:mov al, bl
nf26_76:mov ah, bl
shl eax, 16
nf26_77:mov al, bl
nf26_78:mov ah, bl
mov [edi], eax
add edi, edx
nf26_81:mov al, bl
nf26_82:mov ah, bl
shl eax, 16
nf26_83:mov al, bl
nf26_84:mov ah, bl
mov [edi], eax
add edi, edx
nf26_85:mov al, bl
nf26_86:mov ah, bl
shl eax, 16
nf26_87:mov al, bl
nf26_88:mov ah, bl
mov [edi], eax
add esi, 24
sub edi, 4
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
;----------------------------------------
ALIGN 4
;nf10+32
nf42: ; 1x2 8x4x2 (24 bytes)
if 0 ;debug
mov eax, 0
mov ebx, 0
add esi, 24
jmp nf_solid
endif
xor eax, eax
lea ecx, nfpk_mov4
lea edx, byte ptr ds:nf42_11+1
mov al, [esi+4]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_11-nf42_11)], bl
mov [edx+(nf42_12-nf42_11)], bh
shr ebx, 16
mov [edx+(nf42_13-nf42_11)], bl
mov [edx+(nf42_14-nf42_11)], bh
mov al, [esi+5]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_15-nf42_11)], bl
mov [edx+(nf42_16-nf42_11)], bh
shr ebx, 16
mov [edx+(nf42_17-nf42_11)], bl
mov [edx+(nf42_18-nf42_11)], bh
mov al, [esi+6]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_21-nf42_11)], bl
mov [edx+(nf42_22-nf42_11)], bh
shr ebx, 16
mov [edx+(nf42_23-nf42_11)], bl
mov [edx+(nf42_24-nf42_11)], bh
mov al, [esi+7]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_25-nf42_11)], bl
mov [edx+(nf42_26-nf42_11)], bh
shr ebx, 16
mov [edx+(nf42_27-nf42_11)], bl
mov [edx+(nf42_28-nf42_11)], bh
mov al, [esi+8]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_31-nf42_11)], bl
mov [edx+(nf42_32-nf42_11)], bh
shr ebx, 16
mov [edx+(nf42_33-nf42_11)], bl
mov [edx+(nf42_34-nf42_11)], bh
mov al, [esi+9]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_35-nf42_11)], bl
mov [edx+(nf42_36-nf42_11)], bh
shr ebx, 16
mov [edx+(nf42_37-nf42_11)], bl
mov [edx+(nf42_38-nf42_11)], bh
mov al, [esi+10]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_41-nf42_11)], bl
mov [edx+(nf42_42-nf42_11)], bh
shr ebx, 16
mov [edx+(nf42_43-nf42_11)], bl
mov [edx+(nf42_44-nf42_11)], bh
mov al, [esi+11]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_45-nf42_11)], bl
mov [edx+(nf42_46-nf42_11)], bh
shr ebx, 16
mov [edx+(nf42_47-nf42_11)], bl
mov [edx+(nf42_48-nf42_11)], bh
lea edx, [edx+(nf42_51-nf42_11)]
mov al, [esi+16]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_51-nf42_51)], bl
mov [edx+(nf42_52-nf42_51)], bh
shr ebx, 16
mov [edx+(nf42_53-nf42_51)], bl
mov [edx+(nf42_54-nf42_51)], bh
mov al, [esi+17]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_55-nf42_51)], bl
mov [edx+(nf42_56-nf42_51)], bh
shr ebx, 16
mov [edx+(nf42_57-nf42_51)], bl
mov [edx+(nf42_58-nf42_51)], bh
mov al, [esi+18]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_61-nf42_51)], bl
mov [edx+(nf42_62-nf42_51)], bh
shr ebx, 16
mov [edx+(nf42_63-nf42_51)], bl
mov [edx+(nf42_64-nf42_51)], bh
mov al, [esi+19]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_65-nf42_51)], bl
mov [edx+(nf42_66-nf42_51)], bh
shr ebx, 16
mov [edx+(nf42_67-nf42_51)], bl
mov [edx+(nf42_68-nf42_51)], bh
mov al, [esi+20]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_71-nf42_51)], bl
mov [edx+(nf42_72-nf42_51)], bh
shr ebx, 16
mov [edx+(nf42_73-nf42_51)], bl
mov [edx+(nf42_74-nf42_51)], bh
mov al, [esi+21]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_75-nf42_51)], bl
mov [edx+(nf42_76-nf42_51)], bh
shr ebx, 16
mov [edx+(nf42_77-nf42_51)], bl
mov [edx+(nf42_78-nf42_51)], bh
mov al, [esi+22]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_81-nf42_51)], bl
mov [edx+(nf42_82-nf42_51)], bh
shr ebx, 16
mov [edx+(nf42_83-nf42_51)], bl
mov [edx+(nf42_84-nf42_51)], bh
mov al, [esi+23]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_85-nf42_51)], bl
mov [edx+(nf42_86-nf42_51)], bh
shr ebx, 16
mov [edx+(nf42_87-nf42_51)], bl
mov [edx+(nf42_88-nf42_51)], bh
; Load bl,bh,cl,ch with four colors
mov bx, [esi]
mov cx, [esi+2]
mov edx, nf_width
jmp nf42_0 ; flush prefetch
ALIGN 4
nf42_0:
nf42_11:mov al, bl
nf42_12:mov ah, bl
shl eax, 16
nf42_13:mov al, bl
nf42_14:mov ah, bl
mov [edi], eax
nf42_15:mov al, bl
nf42_16:mov ah, bl
shl eax, 16
nf42_17:mov al, bl
nf42_18:mov ah, bl
mov [edi+4], eax
add edi, edx
nf42_21:mov al, bl
nf42_22:mov ah, bl
shl eax, 16
nf42_23:mov al, bl
nf42_24:mov ah, bl
mov [edi], eax
nf42_25:mov al, bl
nf42_26:mov ah, bl
shl eax, 16
nf42_27:mov al, bl
nf42_28:mov ah, bl
mov [edi+4], eax
add edi, edx
nf42_31:mov al, bl
nf42_32:mov ah, bl
shl eax, 16
nf42_33:mov al, bl
nf42_34:mov ah, bl
mov [edi], eax
nf42_35:mov al, bl
nf42_36:mov ah, bl
shl eax, 16
nf42_37:mov al, bl
nf42_38:mov ah, bl
mov [edi+4], eax
add edi, edx
nf42_41:mov al, bl
nf42_42:mov ah, bl
shl eax, 16
nf42_43:mov al, bl
nf42_44:mov ah, bl
mov [edi], eax
nf42_45:mov al, bl
nf42_46:mov ah, bl
shl eax, 16
nf42_47:mov al, bl
nf42_48:mov ah, bl
mov [edi+4], eax
add edi, edx
; Load bl,bh,cl,ch with four colors
mov bx, [esi+12]
mov cx, [esi+14]
nf42_51:mov al, bl
nf42_52:mov ah, bl
shl eax, 16
nf42_53:mov al, bl
nf42_54:mov ah, bl
mov [edi], eax
nf42_55:mov al, bl
nf42_56:mov ah, bl
shl eax, 16
nf42_57:mov al, bl
nf42_58:mov ah, bl
mov [edi+4], eax
add edi, edx
nf42_61:mov al, bl
nf42_62:mov ah, bl
shl eax, 16
nf42_63:mov al, bl
nf42_64:mov ah, bl
mov [edi], eax
nf42_65:mov al, bl
nf42_66:mov ah, bl
shl eax, 16
nf42_67:mov al, bl
nf42_68:mov ah, bl
mov [edi+4], eax
add edi, edx
nf42_71:mov al, bl
nf42_72:mov ah, bl
shl eax, 16
nf42_73:mov al, bl
nf42_74:mov ah, bl
mov [edi], eax
nf42_75:mov al, bl
nf42_76:mov ah, bl
shl eax, 16
nf42_77:mov al, bl
nf42_78:mov ah, bl
mov [edi+4], eax
add edi, edx
nf42_81:mov al, bl
nf42_82:mov ah, bl
shl eax, 16
nf42_83:mov al, bl
nf42_84:mov ah, bl
mov [edi], eax
nf42_85:mov al, bl
nf42_86:mov ah, bl
shl eax, 16
nf42_87:mov al, bl
nf42_88:mov ah, bl
mov [edi+4], eax
add esi, 24
sub edi, nfpk_back_right
retn
;----------------------------------------
ALIGN 4
nf11: ; 8x8x8 (64 bytes)
if 0 ;debug
add esi, 64
mov eax, 0fefefefeH
; mov ebx, eax
mov ebx, 0
jmp nf_solid
endif
mov edx, nf_width
mov eax, [esi] ;0
mov [edi], eax
mov eax, [esi+4]
mov [edi+4], eax
add edi, edx
mov eax, [esi+8] ;1
mov [edi], eax
mov eax, [esi+12]
mov [edi+4], eax
add edi, edx
mov eax, [esi+16] ;2
mov [edi], eax
mov eax, [esi+20]
mov [edi+4], eax
add edi, edx
mov eax, [esi+24] ;3
mov [edi], eax
mov eax, [esi+28]
mov [edi+4], eax
add edi, edx
mov eax, [esi+32] ;4
mov [edi], eax
mov eax, [esi+36]
mov [edi+4], eax
add edi, edx
mov eax, [esi+40] ;5
mov [edi], eax
mov eax, [esi+44]
mov [edi+4], eax
add edi, edx
mov eax, [esi+48] ;6
mov [edi], eax
mov eax, [esi+52]
mov [edi+4], eax
add edi, edx
mov eax, [esi+56] ;7
mov [edi], eax
mov eax, [esi+60]
mov [edi+4], eax
add esi, 64
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
;----------------------------------------
ALIGN 4
nf12: ; low 4x4x8 (16 bytes)
mov edx, nf_width
mov eax, [esi]
mov bl, ah
mov bh, ah
shl ebx, 16
mov bl, al
mov bh, al
mov [edi], ebx
mov [edi+edx], ebx
shr eax, 16
mov bl, ah
mov bh, ah
shl ebx, 16
mov bl, al
mov bh, al
mov [edi+4], ebx
mov [edi+edx+4], ebx
lea edi, [edi+edx*2]
mov eax, [esi+4]
mov bl, ah
mov bh, ah
shl ebx, 16
mov bl, al
mov bh, al
mov [edi], ebx
mov [edi+edx], ebx
shr eax, 16
mov bl, ah
mov bh, ah
shl ebx, 16
mov bl, al
mov bh, al
mov [edi+4], ebx
mov [edi+edx+4], ebx
lea edi, [edi+edx*2]
mov eax, [esi+8]
mov bl, ah
mov bh, ah
shl ebx, 16
mov bl, al
mov bh, al
mov [edi], ebx
mov [edi+edx], ebx
shr eax, 16
mov bl, ah
mov bh, ah
shl ebx, 16
mov bl, al
mov bh, al
mov [edi+4], ebx
mov [edi+edx+4], ebx
lea edi, [edi+edx*2]
mov eax, [esi+12]
mov bl, ah
mov bh, ah
shl ebx, 16
mov bl, al
mov bh, al
mov [edi], ebx
mov [edi+edx], ebx
shr eax, 16
mov bl, ah
mov bh, ah
shl ebx, 16
mov bl, al
mov bh, al
mov [edi+4], ebx
mov [edi+edx+4], ebx
add edi, edx
sub edi, nfpk_back_right
add esi, 16
retn
;----------------------------------------
ALIGN 4
nf13: ; 2x2 4x4x0 (4 bytes)
mov edx, nf_width
mov cl, [esi]
mov ch, cl
mov eax, ecx
shl eax, 16
mov ax, cx
mov cl, [esi+1]
mov ch, cl
mov ebx, ecx
shl ebx, 16
mov bx, cx
mov [edi], eax
mov [edi+4], ebx
mov [edi+edx], eax
mov [edi+edx+4], ebx
lea edi, [edi+edx*2]
mov [edi], eax
mov [edi+4], ebx
mov [edi+edx], eax
mov [edi+edx+4], ebx
lea edi, [edi+edx*2]
mov cl, [esi+2]
mov ch, cl
mov eax, ecx
shl eax, 16
mov ax, cx
mov cl, [esi+3]
mov ch, cl
mov ebx, ecx
shl ebx, 16
mov bx, cx
mov [edi], eax
mov [edi+4], ebx
mov [edi+edx], eax
mov [edi+edx+4], ebx
lea edi, [edi+edx*2]
mov [edi], eax
mov [edi+4], ebx
add edi, edx
mov [edi], eax
mov [edi+4], ebx
sub edi, nfpk_back_right
add esi, 4
retn
;----------------------------------------
ALIGN 4
nf14: ; 8x8x0 (1 byte)
if 0 ;debug
jmp nf0
endif
mov bl, [esi] ; Copy color into 8 positions
inc esi
mov bh, bl
mov eax, ebx
shl eax, 16
mov ax, bx
mov ebx, eax
if 0 ;debug
mov eax, 080808080h
mov ebx, eax
endif
jmp nf_solid
retn
;----------------------------------------
ALIGN 4
nf15: ; mix 8x8x0 (2 bytes)
if 0 ;debug
inc esi
jmp nf0
endif
mov bx, [esi] ; Copy 2 colors into 8 positions
add esi, 2 ; in a checkerboard
mov ax, bx
shl eax, 16
mov ax, bx
mov ebx, eax
rol ebx, 8
if 0 ;debug
mov eax, 080808080h
mov ebx, eax
endif
nf_solid:
mov edx, nf_width
mov [edi], eax
mov [edi+4], eax
add edi, edx
mov [edi], ebx
mov [edi+4], ebx
add edi, edx
mov [edi], eax
mov [edi+4], eax
add edi, edx
mov [edi], ebx
mov [edi+4], ebx
add edi, edx
mov [edi], eax
mov [edi+4], eax
add edi, edx
mov [edi], ebx
mov [edi+4], ebx
add edi, edx
mov [edi], eax
mov [edi+4], eax
add edi, edx
mov [edi], ebx
mov [edi+4], ebx
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
nfPkDecomp ENDP
; Half vertical resolution version (skip odd lines)
;
nfPkDecompH PROC USES ESI EDI EBX, \
ops:PTRBYTE, comp:PTRBYTE, \
x:DWORD, y:DWORD, w:DWORD, h:DWORD
LOCAL tbuf: PTRBYTE
LOCAL new_row:DWORD
LOCAL DiffBufPtrs:DWORD
LOCAL nfpk_back_right: DWORD
LOCAL wcnt:DWORD
LOG_LABEL "StartPkDecomp"
.data
nfpk_OpTblH label dword
dword offset nf0 ; Prev Same (0)
dword offset nf1 ; No change (and copied to screen) (0)
dword offset nf2 ; Near shift from older part of current buf (1)
dword offset nf3 ; Near shift from newer part of current buf (1)
dword offset nf4 ; Near shift from previous buffer (1)
dword offset nf5 ; Far shift from previous buffer (2)
dword offset nf6 ; Far shift from current buffer (2)
; [Or if COMPOPS, run of no changes (0)]
dword offset nf7 ; 8x8x1 (10 bytes) or low 4x4x1 (4 bytes)
dword offset nf8 ; 2x2 4x4x1 (16 bytes) or 2x1 4x8x1 (12 bytes) or 1x2 8x4x1 (12 bytes)
dword offset nf9 ; 8x8x2 (20 bytes) or low 4x4x2 (8 bytes) or
; low 4x8x2 (12 bytes) or low 8x4x2 (12 bytes)
dword offset nf10 ; 2x2 4x4x2 (32 bytes) or 2x1 4x8x2 (24 bytes) or 1x2 4x8x2 (24 bytes)
dword offset nf11 ; 8x8x8 (64 bytes)
dword offset nf12 ; low 4x4x8 (16 bytes)
dword offset nf13 ; 2x2 4x4x0 (ie 2x2x8) (4 bytes)
dword offset nf14 ; 8x8x0 (1 byte)
dword offset nf15 ; mix 8x8x0 (2 bytes)
.code
ifdef SYMANTEC
mov ebx, ds ; Allow DS to access code
mov ecx, 0
mov ax, 3505h
int 21h
endif
NF_DECOMP_INIT 0
mov eax, nf_width
shl eax, 2
sub eax, nf_new_w
mov new_row, eax
shr nf_new_h, 1
mov eax, nf_width
lea eax, [eax*2+eax-SWIDTH]
mov nfpk_back_right, eax
mov esi, comp
mov edi, tbuf
nf_StartRow:
mov eax, w
shr eax, 1
mov wcnt,eax
ALIGN 4
nf_NextPair:
dec wcnt
js nf_NextRow
mov ebx, ops
mov al, [ebx]
inc ebx
mov ops, ebx
xor ebx, ebx
mov bl, al
shr bl, 4
and eax, 0Fh
push offset nf_NextPair
push nfpk_OpTblH[ebx*4]
jmp nfpk_OpTblH[eax*4]
nf_NextRow:
add edi, new_row
dec h
jnz nf_StartRow
LOG_LABEL "EndPkDecomp"
ifdef SYMANTEC
mov ebx, ds ; Disable DS from accessing code
mov ecx, offset DGROUP:_data_bottom[-1]
mov ax, 3505h
int 21h
endif
ret
;----------------------------------------
ALIGN 4
nf0: ; No change from previous buffer
mov eax, DiffBufPtrs
jmp nf_shift
;----------------------------------------
ALIGN 4
nf1: ; No change (and copied to screen)
add edi, SWIDTH
retn
;----------------------------------------
ALIGN 4
nf2: ; Near shift from older part of current buffer
xor eax, eax
mov al, [esi]
inc esi
mov ax, nfpk_ShiftP2[eax*2]
nf_xyc_shift:
xor ebx, ebx
mov bl, ah
shl eax, 24
sar eax, 24
add bl, 080h
adc bl, 080h
sar bl, 1
add eax, nfpk_ShiftY[ebx*4]
jmp nf_shift
;----------------------------------------
ALIGN 4
nf3: ; Near shift from newer part of current buffer
xor eax, eax
mov al, [esi]
inc esi
mov ax, nfpk_ShiftP2[eax*2]
neg al
neg ah
jmp nf_xyc_shift
;----------------------------------------
ALIGN 4
nf4: ; Near shift from previous buffer
xor eax, eax
mov al, [esi]
inc esi
mov ax, nfpk_ShiftP1[eax*2]
jmp nf_xyp_shift
;----------------------------------------
ALIGN 4
nf5: ; Far shift from previous buffer
mov ax, [esi]
add esi, 2
nf_xyp_shift:
xor ebx, ebx
mov bl, ah
shl eax, 24
sar eax, 24
add bl, 080h
adc bl, 080h
sar bl, 1
add eax, nfpk_ShiftY[ebx*4]
add eax, DiffBufPtrs
jmp nf_shift
;----------------------------------------
ALIGN 4
if COMPOPS
nf6: ; Run of no changes (must only appear in first nibble opcodes)
; Next nibble k specifies 2k+4 squares with no changes
add esp, 4 ; Next nibble is not an opcode
add ebx, 2 ; (minimum of 4 squares)
ALIGN 4
nf6a: add edi, SWIDTH*2 ; Advance over two squares
dec ebx
jz nf6z ; Last pair of squares
dec wcnt ; Same row?
jns nf6a ; Yes
add edi, new_row ; Advance to next row
dec h ; Decrement row count (should never become zero here)
mov eax, w ; Reset wcnt
shr eax ,1
dec eax
mov wcnt, eax
jmp nf6a
nf6z: retn
else
nf6: ; Far shift from current buffer
mov ax, [esi]
add esi, 2
jmp nf_xyc_shift
endif
;----------------------------------------
ALIGN 4
nf_shift:
if 0 ;debug
mov eax, 0
mov ebx, eax
jmp nf_solid
endif
mov ebx, esi ; save esi
lea esi, [edi+eax]
mov edx, nf_width
REPEAT 3
mov eax, [esi]
mov [edi], eax
mov eax, [esi+4]
mov [edi+4], eax
add esi, edx
add edi, edx
ENDM
mov eax, [esi]
mov [edi], eax
mov eax, [esi+4]
mov [edi+4], eax
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
mov esi, ebx ; restore esi
retn
;----------------------------------------
ALIGN 4
nf7: ; 8x8x1 (10 bytes)
mov ax, [esi]
cmp al, ah
ja nf23
if 0 ;debug
add esi, 10
mov eax, 0fefefefeH
mov ebx, eax
jmp nf_solid
endif
xor eax, eax
lea ecx, nfpk_mov8
lea edx, byte ptr ds:nf7_11+2
mov al, [esi+2]
mov ebx, [ecx+eax*4]
mov [edx+(nf7_11-nf7_11)], bl
mov [edx+(nf7_12-nf7_11)], bh
shr ebx, 16
mov [edx+(nf7_13-nf7_11)], bl
mov [edx+(nf7_14-nf7_11)], bh
mov al, [esi+4]
mov ebx, [ecx+eax*4]
mov [edx+(nf7_31-nf7_11)], bl
mov [edx+(nf7_32-nf7_11)], bh
shr ebx, 16
mov [edx+(nf7_33-nf7_11)], bl
mov [edx+(nf7_34-nf7_11)], bh
lea edx, [edx+(nf7_51-nf7_11)]
mov al, [esi+6]
mov ebx, [ecx+eax*4]
mov [edx+(nf7_51-nf7_51)], bl
mov [edx+(nf7_52-nf7_51)], bh
shr ebx, 16
mov [edx+(nf7_53-nf7_51)], bl
mov [edx+(nf7_54-nf7_51)], bh
mov al, [esi+8]
mov ebx, [ecx+eax*4]
mov [edx+(nf7_71-nf7_51)], bl
mov [edx+(nf7_72-nf7_51)], bh
shr ebx, 16
mov [edx+(nf7_73-nf7_51)], bl
mov [edx+(nf7_74-nf7_51)], bh
push ebp
push esi
; load bx,dx,cx,bp with 00,01,10,11 color combinations
; (note that bits are read least significant first).
mov cx, [esi]
mov esi,nf_width
mov bl,cl
mov bh,cl
mov dl,ch
mov dh,cl
mov al,ch
mov ah,ch
mov ebp,eax
jmp nf7_0 ; flush prefetch
ALIGN 4
nf7_0:
nf7_11: mov ax, bx
shl eax, 16
nf7_12: mov ax, bx
mov [edi], eax
nf7_13: mov ax, bx
shl eax, 16
nf7_14: mov ax, bx
mov [edi+4], eax
add edi, esi
nf7_31: mov ax, bx
shl eax, 16
nf7_32: mov ax, bx
mov [edi], eax
nf7_33: mov ax, bx
shl eax, 16
nf7_34: mov ax, bx
mov [edi+4], eax
add edi, esi
nf7_51: mov ax, bx
shl eax, 16
nf7_52: mov ax, bx
mov [edi], eax
nf7_53: mov ax, bx
shl eax, 16
nf7_54: mov ax, bx
mov [edi+4], eax
add edi, esi
nf7_71: mov ax, bx
shl eax, 16
nf7_72: mov ax, bx
mov [edi], eax
nf7_73: mov ax, bx
shl eax, 16
nf7_74: mov ax, bx
mov [edi+4], eax
pop esi
pop ebp
add esi, 10
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
;----------------------------------------
ALIGN 4
;nf7+16
nf23: ; low 4x4x1 (4 bytes)
xor eax, eax
lea ecx, nfpk_mov4l
lea edx, byte ptr ds:nf23_11+2
mov al, [esi+2]
and al, 0fH
mov ebx, [ecx+eax*4]
mov [edx+(nf23_11-nf23_11)], bl
mov [edx+(nf23_12-nf23_11)], bh
shr ebx, 16
mov [edx+(nf23_13-nf23_11)], bl
mov [edx+(nf23_14-nf23_11)], bh
mov al, [esi+2]
shr al, 4
mov ebx, [ecx+eax*4]
mov [edx+(nf23_31-nf23_11)], bl
mov [edx+(nf23_32-nf23_11)], bh
shr ebx, 16
mov [edx+(nf23_33-nf23_11)], bl
mov [edx+(nf23_34-nf23_11)], bh
mov al, [esi+3]
and al, 0fH
mov ebx, [ecx+eax*4]
mov [edx+(nf23_51-nf23_11)], bl
mov [edx+(nf23_52-nf23_11)], bh
shr ebx, 16
mov [edx+(nf23_53-nf23_11)], bl
mov [edx+(nf23_54-nf23_11)], bh
mov al, [esi+3]
shr al, 4
mov ebx, [ecx+eax*4]
mov [edx+(nf23_71-nf23_11)], bl
mov [edx+(nf23_72-nf23_11)], bh
shr ebx, 16
mov [edx+(nf23_73-nf23_11)], bl
mov [edx+(nf23_74-nf23_11)], bh
mov edx, nf_width
; load bx,cx with 00,11 color combinations
mov bx, [esi]
mov cl, bh
mov bh, bl
mov ch, cl
jmp nf23_0 ; flush prefetch
ALIGN 4
nf23_0:
nf23_11:mov ax, bx
shl eax, 16
nf23_12:mov ax, bx
mov [edi], eax
nf23_13:mov ax, bx
shl eax, 16
nf23_14:mov ax, bx
mov [edi+4], eax
add edi, edx
nf23_31:mov ax, bx
shl eax, 16
nf23_32:mov ax, bx
mov [edi], eax
nf23_33:mov ax, bx
shl eax, 16
nf23_34:mov ax, bx
mov [edi+4], eax
add edi, edx
nf23_51:mov ax, bx
shl eax, 16
nf23_52:mov ax, bx
mov [edi], eax
nf23_53:mov ax, bx
shl eax, 16
nf23_54:mov ax, bx
mov [edi+4], eax
add edi, edx
nf23_71:mov ax, bx
shl eax, 16
nf23_72:mov ax, bx
mov [edi], eax
nf23_73:mov ax, bx
shl eax, 16
nf23_74:mov ax, bx
mov [edi+4], eax
sub edi, nfpk_back_right
add esi, 4
retn
;----------------------------------------
ALIGN 4
nf8: ; 2x2 4x4x1 (16 bytes)
mov ax, [esi]
cmp al, ah
ja nf24
xor eax, eax
lea ecx, nfpk_mov8
lea edx, byte ptr ds:nf8_11+2
mov al, [esi+2]
mov ebx, [ecx+eax*4]
mov [edx+(nf8_11-nf8_11)], bl
mov [edx+(nf8_12-nf8_11)], bh
mov al, [esi+3]
mov ebx, [ecx+eax*4]
mov [edx+(nf8_21-nf8_11)], bl
mov [edx+(nf8_22-nf8_11)], bh
mov al, [esi+6]
mov ebx, [ecx+eax*4]
mov [edx+(nf8_31-nf8_11)], bl
mov [edx+(nf8_32-nf8_11)], bh
mov al, [esi+7]
mov ebx, [ecx+eax*4]
mov [edx+(nf8_41-nf8_11)], bl
mov [edx+(nf8_42-nf8_11)], bh
add edx, nf8_51-nf8_11
mov al, [esi+10]
mov ebx, [ecx+eax*4]
mov [edx+(nf8_51-nf8_51)], bl
mov [edx+(nf8_52-nf8_51)], bh
mov al, [esi+11]
mov ebx, [ecx+eax*4]
mov [edx+(nf8_61-nf8_51)], bl
mov [edx+(nf8_62-nf8_51)], bh
mov al, [esi+14]
mov ebx, [ecx+eax*4]
mov [edx+(nf8_71-nf8_51)], bl
mov [edx+(nf8_72-nf8_51)], bh
mov al, [esi+15]
mov ebx, [ecx+eax*4]
mov [edx+(nf8_81-nf8_51)], bl
mov [edx+(nf8_82-nf8_51)], bh
push ebp
push esi
; load bx,dx,cx,bp with 00,01,10,11 color combinations
; (note that bits are read least significant first).
mov cx, [esi]
mov esi, nf_width
mov bl,cl
mov bh,cl
mov dl,ch
mov dh,cl
mov al,ch
mov ah,ch
mov ebp,eax
jmp nf8_0 ; flush prefetch
ALIGN 4
nf8_0:
nf8_11: mov ax, bx
shl eax, 16
nf8_12: mov ax, bx
mov [edi], eax
add edi, esi
nf8_21: mov ax, bx
shl eax, 16
nf8_22: mov ax, bx
mov [edi], eax
add edi, esi
mov eax, [esp]
mov cx, [eax+4]
mov bl,cl
mov bh,cl
mov dl,ch
mov dh,cl
mov al,ch
mov ah,ch
mov ebp,eax
nf8_31: mov ax, bx
shl eax, 16
nf8_32: mov ax, bx
mov [edi], eax
add edi, esi
nf8_41: mov ax, bx
shl eax, 16
nf8_42: mov ax, bx
mov [edi], eax
add edi, esi
lea eax, [esi*4-4]
sub edi, eax
mov eax, [esp]
mov cx, [eax+8]
mov bl,cl
mov bh,cl
mov dl,ch
mov dh,cl
mov al,ch
mov ah,ch
mov ebp,eax
nf8_51: mov ax, bx
shl eax, 16
nf8_52: mov ax, bx
mov [edi], eax
add edi, esi
nf8_61: mov ax, bx
shl eax, 16
nf8_62: mov ax, bx
mov [edi], eax
add edi, esi
mov eax, [esp]
mov cx, [eax+12]
mov bl,cl
mov bh,cl
mov dl,ch
mov dh,cl
mov al,ch
mov ah,ch
mov ebp,eax
nf8_71: mov ax, bx
shl eax, 16
nf8_72: mov ax, bx
mov [edi], eax
add edi, esi
nf8_81: mov ax, bx
shl eax, 16
nf8_82: mov ax, bx
mov [edi], eax
pop esi
pop ebp
add esi, 16
sub edi, 4
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
;----------------------------------------
ALIGN 4
;nf8+16
nf24: ; 2x1 4x8x1 (12 bytes)
mov ax, [esi+6]
cmp al, ah
ja nf40
xor eax, eax
lea ecx, nfpk_mov8
lea edx, byte ptr ds:nf24_11+2
mov al, [esi+2]
mov ebx, [ecx+eax*4]
mov [edx+(nf24_11-nf24_11)], bl
mov [edx+(nf24_12-nf24_11)], bh
mov al, [esi+3]
mov ebx, [ecx+eax*4]
mov [edx+(nf24_21-nf24_11)], bl
mov [edx+(nf24_22-nf24_11)], bh
mov al, [esi+4]
mov ebx, [ecx+eax*4]
mov [edx+(nf24_31-nf24_11)], bl
mov [edx+(nf24_32-nf24_11)], bh
mov al, [esi+5]
mov ebx, [ecx+eax*4]
mov [edx+(nf24_41-nf24_11)], bl
mov [edx+(nf24_42-nf24_11)], bh
add edx, nf24_51-nf24_11
mov al, [esi+8]
mov ebx, [ecx+eax*4]
mov [edx+(nf24_51-nf24_51)], bl
mov [edx+(nf24_52-nf24_51)], bh
mov al, [esi+9]
mov ebx, [ecx+eax*4]
mov [edx+(nf24_61-nf24_51)], bl
mov [edx+(nf24_62-nf24_51)], bh
mov al, [esi+10]
mov ebx, [ecx+eax*4]
mov [edx+(nf24_71-nf24_51)], bl
mov [edx+(nf24_72-nf24_51)], bh
mov al, [esi+11]
mov ebx, [ecx+eax*4]
mov [edx+(nf24_81-nf24_51)], bl
mov [edx+(nf24_82-nf24_51)], bh
push ebp
push esi
; load bx,dx,cx,bp with 00,01,10,11 color combinations
; (note that bits are read least significant first).
mov cx, [esi]
mov esi, nf_width
mov bl,cl
mov bh,cl
mov dl,ch
mov dh,cl
mov al,ch
mov ah,ch
mov ebp,eax
jmp nf24_0 ; flush prefetch
ALIGN 4
nf24_0:
nf24_11:mov ax, bx
shl eax, 16
nf24_12:mov ax, bx
mov [edi], eax
add edi, esi
nf24_21:mov ax, bx
shl eax, 16
nf24_22:mov ax, bx
mov [edi], eax
add edi, esi
nf24_31:mov ax, bx
shl eax, 16
nf24_32:mov ax, bx
mov [edi], eax
add edi, esi
nf24_41:mov ax, bx
shl eax, 16
nf24_42:mov ax, bx
mov [edi], eax
add edi, esi
lea eax, [esi*4-4]
sub edi, eax
mov eax, [esp]
mov cx, [eax+6]
mov bl,cl
mov bh,cl
mov dl,ch
mov dh,cl
mov al,ch
mov ah,ch
mov ebp,eax
nf24_51:mov ax, bx
shl eax, 16
nf24_52:mov ax, bx
mov [edi], eax
add edi, esi
nf24_61:mov ax, bx
shl eax, 16
nf24_62:mov ax, bx
mov [edi], eax
add edi, esi
nf24_71:mov ax, bx
shl eax, 16
nf24_72:mov ax, bx
mov [edi], eax
add edi, esi
nf24_81:mov ax, bx
shl eax, 16
nf24_82:mov ax, bx
mov [edi], eax
pop esi
pop ebp
add esi, 12
sub edi, 4
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
;----------------------------------------
ALIGN 4
;nf8+32
nf40: ; 1x2 8x4x1 (12 bytes)
xor eax, eax
lea ecx, nfpk_mov8
lea edx, byte ptr ds:nf40_11+2
mov al, [esi+2]
mov ebx, [ecx+eax*4]
mov [edx+(nf40_11-nf40_11)], bl
mov [edx+(nf40_12-nf40_11)], bh
shr ebx, 16
mov [edx+(nf40_13-nf40_11)], bl
mov [edx+(nf40_14-nf40_11)], bh
mov al, [esi+4]
mov ebx, [ecx+eax*4]
mov [edx+(nf40_31-nf40_11)], bl
mov [edx+(nf40_32-nf40_11)], bh
shr ebx, 16
mov [edx+(nf40_33-nf40_11)], bl
mov [edx+(nf40_34-nf40_11)], bh
add edx, nf40_51-nf40_11
mov al, [esi+8]
mov ebx, [ecx+eax*4]
mov [edx+(nf40_51-nf40_51)], bl
mov [edx+(nf40_52-nf40_51)], bh
shr ebx, 16
mov [edx+(nf40_53-nf40_51)], bl
mov [edx+(nf40_54-nf40_51)], bh
mov al, [esi+10]
mov ebx, [ecx+eax*4]
mov [edx+(nf40_71-nf40_51)], bl
mov [edx+(nf40_72-nf40_51)], bh
shr ebx, 16
mov [edx+(nf40_73-nf40_51)], bl
mov [edx+(nf40_74-nf40_51)], bh
push ebp
push esi
; load bx,dx,cx,bp with 00,01,10,11 color combinations
; (note that bits are read least significant first).
mov cx, [esi]
mov esi, nf_width
mov bl,cl
mov bh,cl
mov dl,ch
mov dh,cl
mov al,ch
mov ah,ch
mov ebp,eax
jmp nf40_0 ; flush prefetch
ALIGN 4
nf40_0:
nf40_11:mov ax, bx
shl eax, 16
nf40_12:mov ax, bx
mov [edi], eax
nf40_13:mov ax, bx
shl eax, 16
nf40_14:mov ax, bx
mov [edi+4], eax
add edi, esi
nf40_31:mov ax, bx
shl eax, 16
nf40_32:mov ax, bx
mov [edi], eax
nf40_33:mov ax, bx
shl eax, 16
nf40_34:mov ax, bx
mov [edi+4], eax
add edi, esi
mov eax, [esp]
mov cx, [eax+6]
mov bl,cl
mov bh,cl
mov dl,ch
mov dh,cl
mov al,ch
mov ah,ch
mov ebp,eax
nf40_51:mov ax, bx
shl eax, 16
nf40_52:mov ax, bx
mov [edi], eax
nf40_53:mov ax, bx
shl eax, 16
nf40_54:mov ax, bx
mov [edi+4], eax
add edi, esi
nf40_71:mov ax, bx
shl eax, 16
nf40_72:mov ax, bx
mov [edi], eax
nf40_73:mov ax, bx
shl eax, 16
nf40_74:mov ax, bx
mov [edi+4], eax
pop esi
pop ebp
add esi, 12
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
;----------------------------------------
ALIGN 4
nf9: ; 8x8x2 (20 bytes)
mov eax, [esi]
cmp al, ah
ja nf41
shr eax, 16
cmp al, ah
ja nf25
xor eax, eax
lea ecx, nfpk_mov4
lea edx, byte ptr ds:nf9_11+1
mov al, [esi+4]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_11-nf9_11)], bl
mov [edx+(nf9_12-nf9_11)], bh
shr ebx, 16
mov [edx+(nf9_13-nf9_11)], bl
mov [edx+(nf9_14-nf9_11)], bh
mov al, [esi+5]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_15-nf9_11)], bl
mov [edx+(nf9_16-nf9_11)], bh
shr ebx, 16
mov [edx+(nf9_17-nf9_11)], bl
mov [edx+(nf9_18-nf9_11)], bh
mov al, [esi+8]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_31-nf9_11)], bl
mov [edx+(nf9_32-nf9_11)], bh
shr ebx, 16
mov [edx+(nf9_33-nf9_11)], bl
mov [edx+(nf9_34-nf9_11)], bh
mov al, [esi+9]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_35-nf9_11)], bl
mov [edx+(nf9_36-nf9_11)], bh
shr ebx, 16
mov [edx+(nf9_37-nf9_11)], bl
mov [edx+(nf9_38-nf9_11)], bh
lea edx, [edx+(nf9_51-nf9_11)]
mov al, [esi+12]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_51-nf9_51)], bl
mov [edx+(nf9_52-nf9_51)], bh
shr ebx, 16
mov [edx+(nf9_53-nf9_51)], bl
mov [edx+(nf9_54-nf9_51)], bh
mov al, [esi+13]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_55-nf9_51)], bl
mov [edx+(nf9_56-nf9_51)], bh
shr ebx, 16
mov [edx+(nf9_57-nf9_51)], bl
mov [edx+(nf9_58-nf9_51)], bh
mov al, [esi+16]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_71-nf9_51)], bl
mov [edx+(nf9_72-nf9_51)], bh
shr ebx, 16
mov [edx+(nf9_73-nf9_51)], bl
mov [edx+(nf9_74-nf9_51)], bh
mov al, [esi+17]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_75-nf9_51)], bl
mov [edx+(nf9_76-nf9_51)], bh
shr ebx, 16
mov [edx+(nf9_77-nf9_51)], bl
mov [edx+(nf9_78-nf9_51)], bh
; Load bl,bh,cl,ch with four colors
mov bx, [esi]
mov cx, [esi+2]
mov edx, nf_width
jmp nf9_0 ; flush prefetch
ALIGN 4
nf9_0:
nf9_11: mov al, bl
nf9_12: mov ah, bl
shl eax, 16
nf9_13: mov al, bl
nf9_14: mov ah, bl
mov [edi], eax
nf9_15: mov al, bl
nf9_16: mov ah, bl
shl eax, 16
nf9_17: mov al, bl
nf9_18: mov ah, bl
mov [edi+4], eax
add edi, edx
nf9_31: mov al, bl
nf9_32: mov ah, bl
shl eax, 16
nf9_33: mov al, bl
nf9_34: mov ah, bl
mov [edi], eax
nf9_35: mov al, bl
nf9_36: mov ah, bl
shl eax, 16
nf9_37: mov al, bl
nf9_38: mov ah, bl
mov [edi+4], eax
add edi, edx
nf9_51: mov al, bl
nf9_52: mov ah, bl
shl eax, 16
nf9_53: mov al, bl
nf9_54: mov ah, bl
mov [edi], eax
nf9_55: mov al, bl
nf9_56: mov ah, bl
shl eax, 16
nf9_57: mov al, bl
nf9_58: mov ah, bl
mov [edi+4], eax
add edi, edx
nf9_71: mov al, bl
nf9_72: mov ah, bl
shl eax, 16
nf9_73: mov al, bl
nf9_74: mov ah, bl
mov [edi], eax
nf9_75: mov al, bl
nf9_76: mov ah, bl
shl eax, 16
nf9_77: mov al, bl
nf9_78: mov ah, bl
mov [edi+4], eax
add esi, 20
sub edi, nfpk_back_right
retn
;----------------------------------------
ALIGN 4
;nf9+16
nf25: ; low 4x4x2 (8 bytes)
if 0 ;debug
mov eax, 0
mov ebx, 0
add esi, 8
jmp nf_solid
endif
xor eax, eax
lea ecx, nfpk_mov4
lea edx, byte ptr ds:nf25_11+1
mov al, [esi+4]
mov ebx, [ecx+eax*4]
mov [edx+(nf25_14-nf25_11)], bl
mov [edx+(nf25_13-nf25_11)], bh
shr ebx, 16
mov [edx+(nf25_12-nf25_11)], bl
mov [edx+(nf25_11-nf25_11)], bh
mov al, [esi+5]
mov ebx, [ecx+eax*4]
mov [edx+(nf25_24-nf25_11)], bl
mov [edx+(nf25_23-nf25_11)], bh
shr ebx, 16
mov [edx+(nf25_22-nf25_11)], bl
mov [edx+(nf25_21-nf25_11)], bh
mov al, [esi+6]
mov ebx, [ecx+eax*4]
mov [edx+(nf25_34-nf25_11)], bl
mov [edx+(nf25_33-nf25_11)], bh
shr ebx, 16
mov [edx+(nf25_32-nf25_11)], bl
mov [edx+(nf25_31-nf25_11)], bh
mov al, [esi+7]
mov ebx, [ecx+eax*4]
mov [edx+(nf25_44-nf25_11)], bl
mov [edx+(nf25_43-nf25_11)], bh
shr ebx, 16
mov [edx+(nf25_42-nf25_11)], bl
mov [edx+(nf25_41-nf25_11)], bh
; Load bl,bh,cl,ch with four colors
mov bx, [esi]
mov cx, [esi+2]
mov edx, nf_width
jmp nf25_0 ; flush prefetch
ALIGN 4
nf25_0:
nf25_11:mov ah, bl
mov al, ah
shl eax, 16
nf25_12:mov al, bl
mov ah, al
mov [edi], eax
nf25_13:mov ah, bl
mov al, ah
shl eax, 16
nf25_14:mov al, bl
mov ah, al
mov [edi+4], eax
add edi, edx
nf25_21:mov ah, bl
mov al, ah
shl eax, 16
nf25_22:mov al, bl
mov ah, al
mov [edi], eax
nf25_23:mov ah, bl
mov al, ah
shl eax, 16
nf25_24:mov al, bl
mov ah, al
mov [edi+4], eax
add edi, edx
nf25_31:mov ah, bl
mov al, ah
shl eax, 16
nf25_32:mov al, bl
mov ah, al
mov [edi], eax
nf25_33:mov ah, bl
mov al, ah
shl eax, 16
nf25_34:mov al, bl
mov ah, al
mov [edi+4], eax
add edi, edx
nf25_41:mov ah, bl
mov al, ah
shl eax, 16
nf25_42:mov al, bl
mov ah, al
mov [edi], eax
nf25_43:mov ah, bl
mov al, ah
shl eax, 16
nf25_44:mov al, bl
mov ah, al
mov [edi+4], eax
add esi, 8
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
;----------------------------------------
ALIGN 4
;nf9+32
nf41: ; low 4x8x2 (12 bytes)
shr eax, 16
cmp al, ah
ja nf57
xor eax, eax
lea ecx, nfpk_mov4
lea edx, byte ptr ds:nf41_11+1
mov al, [esi+4]
mov ebx, [ecx+eax*4]
mov [edx+(nf41_14-nf41_11)], bl
mov [edx+(nf41_13-nf41_11)], bh
shr ebx, 16
mov [edx+(nf41_12-nf41_11)], bl
mov [edx+(nf41_11-nf41_11)], bh
mov al, [esi+6]
mov ebx, [ecx+eax*4]
mov [edx+(nf41_34-nf41_11)], bl
mov [edx+(nf41_33-nf41_11)], bh
shr ebx, 16
mov [edx+(nf41_32-nf41_11)], bl
mov [edx+(nf41_31-nf41_11)], bh
lea edx, [edx+(nf41_51-nf41_11)]
mov al, [esi+8]
mov ebx, [ecx+eax*4]
mov [edx+(nf41_54-nf41_51)], bl
mov [edx+(nf41_53-nf41_51)], bh
shr ebx, 16
mov [edx+(nf41_52-nf41_51)], bl
mov [edx+(nf41_51-nf41_51)], bh
mov al, [esi+10]
mov ebx, [ecx+eax*4]
mov [edx+(nf41_74-nf41_51)], bl
mov [edx+(nf41_73-nf41_51)], bh
shr ebx, 16
mov [edx+(nf41_72-nf41_51)], bl
mov [edx+(nf41_71-nf41_51)], bh
; Load bl,bh,cl,ch with four colors
mov bx, [esi]
mov cx, [esi+2]
mov edx, nf_width
jmp nf41_0 ; flush prefetch
ALIGN 4
nf41_0:
nf41_11:mov ah, bl
mov al, ah
shl eax, 16
nf41_12:mov al, bl
mov ah, al
mov [edi], eax
nf41_13:mov ah, bl
mov al, ah
shl eax, 16
nf41_14:mov al, bl
mov ah, al
mov [edi+4], eax
add edi, edx
nf41_31:mov ah, bl
mov al, ah
shl eax, 16
nf41_32:mov al, bl
mov ah, al
mov [edi], eax
nf41_33:mov ah, bl
mov al, ah
shl eax, 16
nf41_34:mov al, bl
mov ah, al
mov [edi+4], eax
add edi, edx
nf41_51:mov ah, bl
mov al, ah
shl eax, 16
nf41_52:mov al, bl
mov ah, al
mov [edi], eax
nf41_53:mov ah, bl
mov al, ah
shl eax, 16
nf41_54:mov al, bl
mov ah, al
mov [edi+4], eax
add edi, edx
nf41_71:mov ah, bl
mov al, ah
shl eax, 16
nf41_72:mov al, bl
mov ah, al
mov [edi], eax
nf41_73:mov ah, bl
mov al, ah
shl eax, 16
nf41_74:mov al, bl
mov ah, al
mov [edi+4], eax
add esi, 12
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
;----------------------------------------
ALIGN 4
;nf9+48
nf57: ; low 8x4x2 (12 bytes)
xor eax, eax
lea ecx, nfpk_mov4
lea edx, byte ptr ds:nf57_11+1
mov al, [esi+4]
mov ebx, [ecx+eax*4]
mov [edx+(nf57_11-nf57_11)], bl
mov [edx+(nf57_12-nf57_11)], bh
shr ebx, 16
mov [edx+(nf57_13-nf57_11)], bl
mov [edx+(nf57_14-nf57_11)], bh
mov al, [esi+5]
mov ebx, [ecx+eax*4]
mov [edx+(nf57_15-nf57_11)], bl
mov [edx+(nf57_16-nf57_11)], bh
shr ebx, 16
mov [edx+(nf57_17-nf57_11)], bl
mov [edx+(nf57_18-nf57_11)], bh
mov al, [esi+6]
mov ebx, [ecx+eax*4]
mov [edx+(nf57_21-nf57_11)], bl
mov [edx+(nf57_22-nf57_11)], bh
shr ebx, 16
mov [edx+(nf57_23-nf57_11)], bl
mov [edx+(nf57_24-nf57_11)], bh
mov al, [esi+7]
mov ebx, [ecx+eax*4]
mov [edx+(nf57_25-nf57_11)], bl
mov [edx+(nf57_26-nf57_11)], bh
shr ebx, 16
mov [edx+(nf57_27-nf57_11)], bl
mov [edx+(nf57_28-nf57_11)], bh
mov al, [esi+8]
mov ebx, [ecx+eax*4]
mov [edx+(nf57_31-nf57_11)], bl
mov [edx+(nf57_32-nf57_11)], bh
shr ebx, 16
mov [edx+(nf57_33-nf57_11)], bl
mov [edx+(nf57_34-nf57_11)], bh
mov al, [esi+9]
mov ebx, [ecx+eax*4]
mov [edx+(nf57_35-nf57_11)], bl
mov [edx+(nf57_36-nf57_11)], bh
shr ebx, 16
mov [edx+(nf57_37-nf57_11)], bl
mov [edx+(nf57_38-nf57_11)], bh
mov al, [esi+10]
mov ebx, [ecx+eax*4]
mov [edx+(nf57_41-nf57_11)], bl
mov [edx+(nf57_42-nf57_11)], bh
shr ebx, 16
mov [edx+(nf57_43-nf57_11)], bl
mov [edx+(nf57_44-nf57_11)], bh
mov al, [esi+11]
mov ebx, [ecx+eax*4]
mov [edx+(nf57_45-nf57_11)], bl
mov [edx+(nf57_46-nf57_11)], bh
shr ebx, 16
mov [edx+(nf57_47-nf57_11)], bl
mov [edx+(nf57_48-nf57_11)], bh
; Load bl,bh,cl,ch with four colors
mov bx, [esi]
mov cx, [esi+2]
mov edx, nf_width
jmp nf57_0 ; flush prefetch
ALIGN 4
nf57_0:
nf57_11:mov al, bl
nf57_12:mov ah, bl
shl eax, 16
nf57_13:mov al, bl
nf57_14:mov ah, bl
mov [edi], eax
nf57_15:mov al, bl
nf57_16:mov ah, bl
shl eax, 16
nf57_17:mov al, bl
nf57_18:mov ah, bl
mov [edi+4], eax
add edi, edx
nf57_21:mov al, bl
nf57_22:mov ah, bl
shl eax, 16
nf57_23:mov al, bl
nf57_24:mov ah, bl
mov [edi], eax
nf57_25:mov al, bl
nf57_26:mov ah, bl
shl eax, 16
nf57_27:mov al, bl
nf57_28:mov ah, bl
mov [edi+4], eax
add edi, edx
nf57_31:mov al, bl
nf57_32:mov ah, bl
shl eax, 16
nf57_33:mov al, bl
nf57_34:mov ah, bl
mov [edi], eax
nf57_35:mov al, bl
nf57_36:mov ah, bl
shl eax, 16
nf57_37:mov al, bl
nf57_38:mov ah, bl
mov [edi+4], eax
add edi, edx
nf57_41:mov al, bl
nf57_42:mov ah, bl
shl eax, 16
nf57_43:mov al, bl
nf57_44:mov ah, bl
mov [edi], eax
nf57_45:mov al, bl
nf57_46:mov ah, bl
shl eax, 16
nf57_47:mov al, bl
nf57_48:mov ah, bl
mov [edi+4], eax
add esi, 12
sub edi, nfpk_back_right
retn
;----------------------------------------
ALIGN 4
nf10: ; 2x2 4x4x2 (32 bytes)
mov ax, [esi]
cmp al, ah
ja nf26
xor eax, eax
lea ecx, nfpk_mov4
lea edx, byte ptr ds:nf10_11+1
mov al, [esi+4]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_11-nf10_11)], bl
mov [edx+(nf10_12-nf10_11)], bh
shr ebx, 16
mov [edx+(nf10_13-nf10_11)], bl
mov [edx+(nf10_14-nf10_11)], bh
mov al, [esi+6]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_21-nf10_11)], bl
mov [edx+(nf10_22-nf10_11)], bh
shr ebx, 16
mov [edx+(nf10_23-nf10_11)], bl
mov [edx+(nf10_24-nf10_11)], bh
mov al, [esi+12]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_31-nf10_11)], bl
mov [edx+(nf10_32-nf10_11)], bh
shr ebx, 16
mov [edx+(nf10_33-nf10_11)], bl
mov [edx+(nf10_34-nf10_11)], bh
mov al, [esi+14]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_41-nf10_11)], bl
mov [edx+(nf10_42-nf10_11)], bh
shr ebx, 16
mov [edx+(nf10_43-nf10_11)], bl
mov [edx+(nf10_44-nf10_11)], bh
lea edx, [edx+(nf10_51-nf10_11)]
mov al, [esi+20]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_51-nf10_51)], bl
mov [edx+(nf10_52-nf10_51)], bh
shr ebx, 16
mov [edx+(nf10_53-nf10_51)], bl
mov [edx+(nf10_54-nf10_51)], bh
mov al, [esi+22]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_61-nf10_51)], bl
mov [edx+(nf10_62-nf10_51)], bh
shr ebx, 16
mov [edx+(nf10_63-nf10_51)], bl
mov [edx+(nf10_64-nf10_51)], bh
mov al, [esi+28]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_71-nf10_51)], bl
mov [edx+(nf10_72-nf10_51)], bh
shr ebx, 16
mov [edx+(nf10_73-nf10_51)], bl
mov [edx+(nf10_74-nf10_51)], bh
mov al, [esi+30]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_81-nf10_51)], bl
mov [edx+(nf10_82-nf10_51)], bh
shr ebx, 16
mov [edx+(nf10_83-nf10_51)], bl
mov [edx+(nf10_84-nf10_51)], bh
; Load bl,bh,cl,ch with four colors
mov bx, [esi]
mov cx, [esi+2]
mov edx, nf_width
jmp nf10_0 ; flush prefetch
ALIGN 4
nf10_0:
nf10_11:mov al, bl
nf10_12:mov ah, bl
shl eax, 16
nf10_13:mov al, bl
nf10_14:mov ah, bl
mov [edi], eax
add edi, edx
nf10_21:mov al, bl
nf10_22:mov ah, bl
shl eax, 16
nf10_23:mov al, bl
nf10_24:mov ah, bl
mov [edi], eax
add edi, edx
; Load bl,bh,cl,ch with four colors
mov bx, [esi+8]
mov cx, [esi+10]
nf10_31:mov al, bl
nf10_32:mov ah, bl
shl eax, 16
nf10_33:mov al, bl
nf10_34:mov ah, bl
mov [edi], eax
add edi, edx
nf10_41:mov al, bl
nf10_42:mov ah, bl
shl eax, 16
nf10_43:mov al, bl
nf10_44:mov ah, bl
mov [edi], eax
add edi, edx
lea eax, [edx*4-4]
sub edi, eax
; Load bl,bh,cl,ch with four colors
mov bx, [esi+16]
mov cx, [esi+18]
nf10_51:mov al, bl
nf10_52:mov ah, bl
shl eax, 16
nf10_53:mov al, bl
nf10_54:mov ah, bl
mov [edi], eax
add edi, edx
nf10_61:mov al, bl
nf10_62:mov ah, bl
shl eax, 16
nf10_63:mov al, bl
nf10_64:mov ah, bl
mov [edi], eax
add edi, edx
; Load bl,bh,cl,ch with four colors
mov bx, [esi+24]
mov cx, [esi+26]
nf10_71:mov al, bl
nf10_72:mov ah, bl
shl eax, 16
nf10_73:mov al, bl
nf10_74:mov ah, bl
mov [edi], eax
add edi, edx
nf10_81:mov al, bl
nf10_82:mov ah, bl
shl eax, 16
nf10_83:mov al, bl
nf10_84:mov ah, bl
mov [edi], eax
add esi, 32
sub edi, 4
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
;----------------------------------------
ALIGN 4
;nf10+16
nf26: ; 2x1 4x8x2 (24 bytes)
mov ax, [esi+12]
cmp al, ah
ja nf42
if 0 ;debug
mov eax, 0
mov ebx, 0
add esi, 24
jmp nf_solid
endif
xor eax, eax
lea ecx, nfpk_mov4
lea edx, byte ptr ds:nf26_11+1
mov al, [esi+4]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_11-nf26_11)], bl
mov [edx+(nf26_12-nf26_11)], bh
shr ebx, 16
mov [edx+(nf26_13-nf26_11)], bl
mov [edx+(nf26_14-nf26_11)], bh
mov al, [esi+6]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_21-nf26_11)], bl
mov [edx+(nf26_22-nf26_11)], bh
shr ebx, 16
mov [edx+(nf26_23-nf26_11)], bl
mov [edx+(nf26_24-nf26_11)], bh
mov al, [esi+8]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_31-nf26_11)], bl
mov [edx+(nf26_32-nf26_11)], bh
shr ebx, 16
mov [edx+(nf26_33-nf26_11)], bl
mov [edx+(nf26_34-nf26_11)], bh
mov al, [esi+10]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_41-nf26_11)], bl
mov [edx+(nf26_42-nf26_11)], bh
shr ebx, 16
mov [edx+(nf26_43-nf26_11)], bl
mov [edx+(nf26_44-nf26_11)], bh
lea edx, [edx+(nf26_51-nf26_11)]
mov al, [esi+16]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_51-nf26_51)], bl
mov [edx+(nf26_52-nf26_51)], bh
shr ebx, 16
mov [edx+(nf26_53-nf26_51)], bl
mov [edx+(nf26_54-nf26_51)], bh
mov al, [esi+18]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_61-nf26_51)], bl
mov [edx+(nf26_62-nf26_51)], bh
shr ebx, 16
mov [edx+(nf26_63-nf26_51)], bl
mov [edx+(nf26_64-nf26_51)], bh
mov al, [esi+20]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_71-nf26_51)], bl
mov [edx+(nf26_72-nf26_51)], bh
shr ebx, 16
mov [edx+(nf26_73-nf26_51)], bl
mov [edx+(nf26_74-nf26_51)], bh
mov al, [esi+22]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_81-nf26_51)], bl
mov [edx+(nf26_82-nf26_51)], bh
shr ebx, 16
mov [edx+(nf26_83-nf26_51)], bl
mov [edx+(nf26_84-nf26_51)], bh
; Load bl,bh,cl,ch with four colors
mov bx, [esi]
mov cx, [esi+2]
mov edx, nf_width
jmp nf26_0 ; flush prefetch
ALIGN 4
nf26_0:
nf26_11:mov al, bl
nf26_12:mov ah, bl
shl eax, 16
nf26_13:mov al, bl
nf26_14:mov ah, bl
mov [edi], eax
add edi, edx
nf26_21:mov al, bl
nf26_22:mov ah, bl
shl eax, 16
nf26_23:mov al, bl
nf26_24:mov ah, bl
mov [edi], eax
add edi, edx
nf26_31:mov al, bl
nf26_32:mov ah, bl
shl eax, 16
nf26_33:mov al, bl
nf26_34:mov ah, bl
mov [edi], eax
add edi, edx
nf26_41:mov al, bl
nf26_42:mov ah, bl
shl eax, 16
nf26_43:mov al, bl
nf26_44:mov ah, bl
mov [edi], eax
add edi, edx
lea eax, [edx*4-4]
sub edi, eax
; Load bl,bh,cl,ch with four colors
mov bx, [esi+12]
mov cx, [esi+14]
nf26_51:mov al, bl
nf26_52:mov ah, bl
shl eax, 16
nf26_53:mov al, bl
nf26_54:mov ah, bl
mov [edi], eax
add edi, edx
nf26_61:mov al, bl
nf26_62:mov ah, bl
shl eax, 16
nf26_63:mov al, bl
nf26_64:mov ah, bl
mov [edi], eax
add edi, edx
nf26_71:mov al, bl
nf26_72:mov ah, bl
shl eax, 16
nf26_73:mov al, bl
nf26_74:mov ah, bl
mov [edi], eax
add edi, edx
nf26_81:mov al, bl
nf26_82:mov ah, bl
shl eax, 16
nf26_83:mov al, bl
nf26_84:mov ah, bl
mov [edi], eax
add esi, 24
sub edi, 4
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
;----------------------------------------
ALIGN 4
;nf10+32
nf42: ; 1x2 8x4x2 (24 bytes)
if 0 ;debug
mov eax, 0
mov ebx, 0
add esi, 24
jmp nf_solid
endif
xor eax, eax
lea ecx, nfpk_mov4
lea edx, byte ptr ds:nf42_11+1
mov al, [esi+4]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_11-nf42_11)], bl
mov [edx+(nf42_12-nf42_11)], bh
shr ebx, 16
mov [edx+(nf42_13-nf42_11)], bl
mov [edx+(nf42_14-nf42_11)], bh
mov al, [esi+5]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_15-nf42_11)], bl
mov [edx+(nf42_16-nf42_11)], bh
shr ebx, 16
mov [edx+(nf42_17-nf42_11)], bl
mov [edx+(nf42_18-nf42_11)], bh
mov al, [esi+8]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_31-nf42_11)], bl
mov [edx+(nf42_32-nf42_11)], bh
shr ebx, 16
mov [edx+(nf42_33-nf42_11)], bl
mov [edx+(nf42_34-nf42_11)], bh
mov al, [esi+9]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_35-nf42_11)], bl
mov [edx+(nf42_36-nf42_11)], bh
shr ebx, 16
mov [edx+(nf42_37-nf42_11)], bl
mov [edx+(nf42_38-nf42_11)], bh
lea edx, [edx+(nf42_51-nf42_11)]
mov al, [esi+16]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_51-nf42_51)], bl
mov [edx+(nf42_52-nf42_51)], bh
shr ebx, 16
mov [edx+(nf42_53-nf42_51)], bl
mov [edx+(nf42_54-nf42_51)], bh
mov al, [esi+17]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_55-nf42_51)], bl
mov [edx+(nf42_56-nf42_51)], bh
shr ebx, 16
mov [edx+(nf42_57-nf42_51)], bl
mov [edx+(nf42_58-nf42_51)], bh
mov al, [esi+20]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_71-nf42_51)], bl
mov [edx+(nf42_72-nf42_51)], bh
shr ebx, 16
mov [edx+(nf42_73-nf42_51)], bl
mov [edx+(nf42_74-nf42_51)], bh
mov al, [esi+21]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_75-nf42_51)], bl
mov [edx+(nf42_76-nf42_51)], bh
shr ebx, 16
mov [edx+(nf42_77-nf42_51)], bl
mov [edx+(nf42_78-nf42_51)], bh
; Load bl,bh,cl,ch with four colors
mov bx, [esi]
mov cx, [esi+2]
mov edx, nf_width
jmp nf42_0 ; flush prefetch
ALIGN 4
nf42_0:
nf42_11:mov al, bl
nf42_12:mov ah, bl
shl eax, 16
nf42_13:mov al, bl
nf42_14:mov ah, bl
mov [edi], eax
nf42_15:mov al, bl
nf42_16:mov ah, bl
shl eax, 16
nf42_17:mov al, bl
nf42_18:mov ah, bl
mov [edi+4], eax
add edi, edx
nf42_31:mov al, bl
nf42_32:mov ah, bl
shl eax, 16
nf42_33:mov al, bl
nf42_34:mov ah, bl
mov [edi], eax
nf42_35:mov al, bl
nf42_36:mov ah, bl
shl eax, 16
nf42_37:mov al, bl
nf42_38:mov ah, bl
mov [edi+4], eax
add edi, edx
; Load bl,bh,cl,ch with four colors
mov bx, [esi+12]
mov cx, [esi+14]
nf42_51:mov al, bl
nf42_52:mov ah, bl
shl eax, 16
nf42_53:mov al, bl
nf42_54:mov ah, bl
mov [edi], eax
nf42_55:mov al, bl
nf42_56:mov ah, bl
shl eax, 16
nf42_57:mov al, bl
nf42_58:mov ah, bl
mov [edi+4], eax
add edi, edx
nf42_71:mov al, bl
nf42_72:mov ah, bl
shl eax, 16
nf42_73:mov al, bl
nf42_74:mov ah, bl
mov [edi], eax
nf42_75:mov al, bl
nf42_76:mov ah, bl
shl eax, 16
nf42_77:mov al, bl
nf42_78:mov ah, bl
mov [edi+4], eax
add esi, 24
sub edi, nfpk_back_right
retn
;----------------------------------------
ALIGN 4
nf11: ; 8x8x8 (64 bytes)
if 0 ;debug
add esi, 64
mov eax, 0fefefefeH
; mov ebx, eax
mov ebx, 0
jmp nf_solid
endif
mov edx, nf_width
mov eax, [esi] ;0
mov [edi], eax
mov eax, [esi+4]
mov [edi+4], eax
add edi, edx
mov eax, [esi+16] ;2
mov [edi], eax
mov eax, [esi+20]
mov [edi+4], eax
add edi, edx
mov eax, [esi+32] ;4
mov [edi], eax
mov eax, [esi+36]
mov [edi+4], eax
add edi, edx
mov eax, [esi+48] ;6
mov [edi], eax
mov eax, [esi+52]
mov [edi+4], eax
add esi, 64
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
;----------------------------------------
ALIGN 4
nf12: ; low 4x4x8 (16 bytes)
mov edx, nf_width
mov eax, [esi]
mov bl, ah
mov bh, ah
shl ebx, 16
mov bl, al
mov bh, al
mov [edi], ebx
shr eax, 16
mov bl, ah
mov bh, ah
shl ebx, 16
mov bl, al
mov bh, al
mov [edi+4], ebx
add edi, edx
mov eax, [esi+4]
mov bl, ah
mov bh, ah
shl ebx, 16
mov bl, al
mov bh, al
mov [edi], ebx
shr eax, 16
mov bl, ah
mov bh, ah
shl ebx, 16
mov bl, al
mov bh, al
mov [edi+4], ebx
add edi, edx
mov eax, [esi+8]
mov bl, ah
mov bh, ah
shl ebx, 16
mov bl, al
mov bh, al
mov [edi], ebx
shr eax, 16
mov bl, ah
mov bh, ah
shl ebx, 16
mov bl, al
mov bh, al
mov [edi+4], ebx
add edi, edx
mov eax, [esi+12]
mov bl, ah
mov bh, ah
shl ebx, 16
mov bl, al
mov bh, al
mov [edi], ebx
shr eax, 16
mov bl, ah
mov bh, ah
shl ebx, 16
mov bl, al
mov bh, al
mov [edi+4], ebx
sub edi, nfpk_back_right
add esi, 16
retn
;----------------------------------------
ALIGN 4
nf13: ; 2x2 4x4x0 (4 bytes)
mov edx, nf_width
mov cl, [esi]
mov ch, cl
mov eax, ecx
shl eax, 16
mov ax, cx
mov cl, [esi+1]
mov ch, cl
mov ebx, ecx
shl ebx, 16
mov bx, cx
mov [edi], eax
mov [edi+4], ebx
mov [edi+edx], eax
mov [edi+edx+4], ebx
lea edi, [edi+edx*2]
mov cl, [esi+2]
mov ch, cl
mov eax, ecx
shl eax, 16
mov ax, cx
mov cl, [esi+3]
mov ch, cl
mov ebx, ecx
shl ebx, 16
mov bx, cx
mov [edi], eax
mov [edi+4], ebx
add edi, edx
mov [edi], eax
mov [edi+4], ebx
sub edi, nfpk_back_right
add esi, 4
retn
;----------------------------------------
ALIGN 4
nf14: ; 8x8x0 (1 byte)
mov bl, [esi] ; Copy color into 8 positions
inc esi
mov bh, bl
mov eax, ebx
shl eax, 16
mov ax, bx
mov ebx, eax
if 0 ;debug
mov eax, 080808080h
mov ebx, eax
endif
jmp nf_solid
retn
;----------------------------------------
ALIGN 4
nf15: ; mix 8x8x0 (2 bytes)
mov bx, [esi] ; Copy 2 colors into 8 positions
add esi, 2 ; in a checkerboard
mov ax, bx
shl eax, 16
mov ax, bx
mov ebx, eax
rol ebx, 8
if 0 ;debug
mov eax, 080808080h
mov ebx, eax
endif
nf_solid:
mov edx, nf_width
mov [edi], eax
mov [edi+4], eax
add edi, edx
mov [edi], ebx
mov [edi+4], ebx
add edi, edx
mov [edi], eax
mov [edi+4], eax
add edi, edx
mov [edi], ebx
mov [edi+4], ebx
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
nfPkDecompH ENDP
if DECOMPD
; Half vertical resolution version (dither between lines)
;
nfPkDecompD PROC USES ESI EDI EBX, \
ops:PTRBYTE, comp:PTRBYTE, \
x:DWORD, y:DWORD, w:DWORD, h:DWORD
LOCAL tbuf: PTRBYTE
LOCAL new_row:DWORD
LOCAL DiffBufPtrs:DWORD
LOCAL nfpk_back_right: DWORD
LOCAL wcnt:DWORD
LOG_LABEL "StartPkDecomp"
.data
nfpk_OpTblD label dword
dword offset nf0 ; Prev Same (0)
dword offset nf1 ; No change (and copied to screen) (0)
dword offset nf2 ; Near shift from older part of current buf (1)
dword offset nf3 ; Near shift from newer part of current buf (1)
dword offset nf4 ; Near shift from previous buffer (1)
dword offset nf5 ; Far shift from previous buffer (2)
dword offset nf6 ; Far shift from current buffer (2)
; [Or if COMPOPS, run of no changes (0)]
dword offset nf7 ; 8x8x1 (10 bytes) or low 4x4x1 (4 bytes)
dword offset nf8 ; 2x2 4x4x1 (16 bytes) or 2x1 4x8x1 (12 bytes) or 1x2 8x4x1 (12 bytes)
dword offset nf9 ; 8x8x2 (20 bytes) or low 4x4x2 (8 bytes) or
; low 4x8x2 (12 bytes) or low 8x4x2 (12 bytes)
dword offset nf10 ; 2x2 4x4x2 (32 bytes) or 2x1 4x8x2 (24 bytes) or 1x2 4x8x2 (24 bytes)
dword offset nf11 ; 8x8x8 (64 bytes)
dword offset nf12 ; low 4x4x8 (16 bytes)
dword offset nf13 ; 2x2 4x4x0 (ie 2x2x8) (4 bytes)
dword offset nf14 ; 8x8x0 (1 byte)
dword offset nf15 ; mix 8x8x0 (2 bytes)
.code
ifdef SYMANTEC
mov ebx, ds ; Allow DS to access code
mov ecx, 0
mov ax, 3505h
int 21h
endif
NF_DECOMP_INIT 0
mov eax, nf_width
shl eax, 2
sub eax, nf_new_w
mov new_row, eax
shr nf_new_h, 1
mov eax, nf_width
lea eax, [eax*2+eax-SWIDTH]
mov nfpk_back_right, eax
mov esi, comp
mov edi, tbuf
nf_StartRow:
mov eax, w
shr eax, 1
mov wcnt,eax
ALIGN 4
nf_NextPair:
dec wcnt
js nf_NextRow
mov ebx, ops
mov al, [ebx]
inc ebx
mov ops, ebx
xor ebx, ebx
mov bl, al
shr bl, 4
and eax, 0Fh
push offset nf_NextPair
push nfpk_OpTblD[ebx*4]
jmp nfpk_OpTblD[eax*4]
nf_NextRow:
add edi, new_row
dec h
jnz nf_StartRow
LOG_LABEL "EndPkDecomp"
ifdef SYMANTEC
mov ebx, ds ; Disable DS from accessing code
mov ecx, offset DGROUP:_data_bottom[-1]
mov ax, 3505h
int 21h
endif
ret
;----------------------------------------
ALIGN 4
nf0: ; No change from previous buffer
mov eax, DiffBufPtrs
jmp nf_shiftr
;----------------------------------------
ALIGN 4
nf1: ; No change (and copied to screen)
add edi, SWIDTH
retn
;----------------------------------------
ALIGN 4
nf2: ; Near shift from older part of current buffer
xor eax, eax
mov al, [esi]
inc esi
mov ax, nfpk_ShiftP2[eax*2]
nf_xyc_shift:
xor ebx, ebx
mov bl, ah
shl eax, 24
sar eax, 24
sar bl, 1
pushf
add eax, nfpk_ShiftY[ebx*4]
jmp nf_shift
;----------------------------------------
ALIGN 4
nf3: ; Near shift from newer part of current buffer
xor eax, eax
mov al, [esi]
inc esi
mov ax, nfpk_ShiftP2[eax*2]
neg al
neg ah
jmp nf_xyc_shift
;----------------------------------------
ALIGN 4
nf4: ; Near shift from previous buffer
xor eax, eax
mov al, [esi]
inc esi
mov ax, nfpk_ShiftP1[eax*2]
jmp nf_xyp_shift
;----------------------------------------
ALIGN 4
nf5: ; Far shift from previous buffer
mov ax, [esi]
add esi, 2
nf_xyp_shift:
xor ebx, ebx
mov bl, ah
shl eax, 24
sar eax, 24
sar bl, 1
pushf
add eax, nfpk_ShiftY[ebx*4]
add eax, DiffBufPtrs
jmp nf_shift
;----------------------------------------
ALIGN 4
if COMPOPS
nf6: ; Run of no changes (must only appear in first nibble opcodes)
; Next nibble k specifies 2k+4 squares with no changes
add esp, 4 ; Next nibble is not an opcode
add ebx, 2 ; (minimum of 4 squares)
ALIGN 4
nf6a: add edi, SWIDTH*2 ; Advance over two squares
dec ebx
jz nf6z ; Last pair of squares
dec wcnt ; Same row?
jns nf6a ; Yes
add edi, new_row ; Advance to next row
dec h ; Decrement row count (should never become zero here)
mov eax, w ; Reset wcnt
shr eax ,1
dec eax
mov wcnt, eax
jmp nf6a
nf6z: retn
else
nf6: ; Far shift from current buffer
mov ax, [esi]
add esi, 2
jmp nf_xyc_shift
endif
;----------------------------------------
ALIGN 4
nf_shift:
popf
if 0 ;debug
mov eax, 0
mov ebx, eax
jmp nf_solid
endif
jc nf_shiftd
nf_shiftr:
mov ebx, esi ; save esi
lea esi, [edi+eax]
mov edx, nf_width
REPEAT 3
mov eax, [esi]
mov [edi], eax
mov eax, [esi+4]
mov [edi+4], eax
add esi, edx
add edi, edx
ENDM
mov eax, [esi]
mov [edi], eax
mov eax, [esi+4]
mov [edi+4], eax
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
mov esi, ebx ; restore esi
retn
nf_shiftd:
push esi
lea esi, [edi+eax]
mov edx, nf_width
mov ebx, 000ff00ffH
REPEAT 3
mov eax, [esi]
mov ecx, eax
xor ecx, [esi+edx]
and ecx, ebx
xor eax, ecx
mov [edi], eax
mov eax, [esi+4]
mov ecx, eax
xor ecx, [esi+edx+4]
and ecx, ebx
xor eax, ecx
mov [edi+4], eax
add esi, edx
add edi, edx
ENDM
mov eax, [esi]
mov ecx, eax
xor ecx, [esi+edx]
and ecx, ebx
xor eax, ecx
mov [edi], eax
mov eax, [esi+4]
mov ecx, eax
xor ecx, [esi+edx+4]
and ecx, ebx
xor eax, ecx
mov [edi+4], eax
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
pop esi
retn
;----------------------------------------
ALIGN 4
nf7: ; 8x8x1 (10 bytes)
mov ax, [esi]
cmp al, ah
ja nf23
if 0 ;debug
add esi, 10
mov eax, 0fefefefeH
mov ebx, eax
jmp nf_solid
endif
xor eax, eax
lea ecx, nfpk_mov8
lea edx, byte ptr ds:nf7_11+2
mov al, [esi+2]
mov bl, al
xor bl, [esi+3]
and bl, 0aaH
xor al, bl
mov ebx, [ecx+eax*4]
mov [edx+(nf7_11-nf7_11)], bl
mov [edx+(nf7_12-nf7_11)], bh
shr ebx, 16
mov [edx+(nf7_13-nf7_11)], bl
mov [edx+(nf7_14-nf7_11)], bh
mov al, [esi+4]
mov bl, al
xor bl, [esi+5]
and bl, 0aaH
xor al, bl
mov ebx, [ecx+eax*4]
mov [edx+(nf7_31-nf7_11)], bl
mov [edx+(nf7_32-nf7_11)], bh
shr ebx, 16
mov [edx+(nf7_33-nf7_11)], bl
mov [edx+(nf7_34-nf7_11)], bh
lea edx, [edx+(nf7_51-nf7_11)]
mov al, [esi+6]
mov bl, al
xor bl, [esi+7]
and bl, 0aaH
xor al, bl
mov ebx, [ecx+eax*4]
mov [edx+(nf7_51-nf7_51)], bl
mov [edx+(nf7_52-nf7_51)], bh
shr ebx, 16
mov [edx+(nf7_53-nf7_51)], bl
mov [edx+(nf7_54-nf7_51)], bh
mov al, [esi+8]
mov bl, al
xor bl, [esi+9]
and bl, 0aaH
xor al, bl
mov ebx, [ecx+eax*4]
mov [edx+(nf7_71-nf7_51)], bl
mov [edx+(nf7_72-nf7_51)], bh
shr ebx, 16
mov [edx+(nf7_73-nf7_51)], bl
mov [edx+(nf7_74-nf7_51)], bh
push ebp
push esi
; load bx,dx,cx,bp with 00,01,10,11 color combinations
; (note that bits are read least significant first).
mov cx, [esi]
mov esi,nf_width
mov bl,cl
mov bh,cl
mov dl,ch
mov dh,cl
mov al,ch
mov ah,ch
mov ebp,eax
jmp nf7_0 ; flush prefetch
ALIGN 4
nf7_0:
nf7_11: mov ax, bx
shl eax, 16
nf7_12: mov ax, bx
mov [edi], eax
nf7_13: mov ax, bx
shl eax, 16
nf7_14: mov ax, bx
mov [edi+4], eax
add edi, esi
nf7_31: mov ax, bx
shl eax, 16
nf7_32: mov ax, bx
mov [edi], eax
nf7_33: mov ax, bx
shl eax, 16
nf7_34: mov ax, bx
mov [edi+4], eax
add edi, esi
nf7_51: mov ax, bx
shl eax, 16
nf7_52: mov ax, bx
mov [edi], eax
nf7_53: mov ax, bx
shl eax, 16
nf7_54: mov ax, bx
mov [edi+4], eax
add edi, esi
nf7_71: mov ax, bx
shl eax, 16
nf7_72: mov ax, bx
mov [edi], eax
nf7_73: mov ax, bx
shl eax, 16
nf7_74: mov ax, bx
mov [edi+4], eax
pop esi
pop ebp
add esi, 10
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
;----------------------------------------
ALIGN 4
;nf7+16
nf23: ; low 4x4x1 (4 bytes)
xor eax, eax
lea ecx, nfpk_mov4l
lea edx, byte ptr ds:nf23_11+2
mov al, [esi+2]
and al, 0fH
mov ebx, [ecx+eax*4]
mov [edx+(nf23_11-nf23_11)], bl
mov [edx+(nf23_12-nf23_11)], bh
shr ebx, 16
mov [edx+(nf23_13-nf23_11)], bl
mov [edx+(nf23_14-nf23_11)], bh
mov al, [esi+2]
shr al, 4
mov ebx, [ecx+eax*4]
mov [edx+(nf23_31-nf23_11)], bl
mov [edx+(nf23_32-nf23_11)], bh
shr ebx, 16
mov [edx+(nf23_33-nf23_11)], bl
mov [edx+(nf23_34-nf23_11)], bh
mov al, [esi+3]
and al, 0fH
mov ebx, [ecx+eax*4]
mov [edx+(nf23_51-nf23_11)], bl
mov [edx+(nf23_52-nf23_11)], bh
shr ebx, 16
mov [edx+(nf23_53-nf23_11)], bl
mov [edx+(nf23_54-nf23_11)], bh
mov al, [esi+3]
shr al, 4
mov ebx, [ecx+eax*4]
mov [edx+(nf23_71-nf23_11)], bl
mov [edx+(nf23_72-nf23_11)], bh
shr ebx, 16
mov [edx+(nf23_73-nf23_11)], bl
mov [edx+(nf23_74-nf23_11)], bh
mov edx, nf_width
; load bx,cx with 00,11 color combinations
mov bx, [esi]
mov cl, bh
mov bh, bl
mov ch, cl
jmp nf23_0 ; flush prefetch
ALIGN 4
nf23_0:
nf23_11:mov ax, bx
shl eax, 16
nf23_12:mov ax, bx
mov [edi], eax
nf23_13:mov ax, bx
shl eax, 16
nf23_14:mov ax, bx
mov [edi+4], eax
add edi, edx
nf23_31:mov ax, bx
shl eax, 16
nf23_32:mov ax, bx
mov [edi], eax
nf23_33:mov ax, bx
shl eax, 16
nf23_34:mov ax, bx
mov [edi+4], eax
add edi, edx
nf23_51:mov ax, bx
shl eax, 16
nf23_52:mov ax, bx
mov [edi], eax
nf23_53:mov ax, bx
shl eax, 16
nf23_54:mov ax, bx
mov [edi+4], eax
add edi, edx
nf23_71:mov ax, bx
shl eax, 16
nf23_72:mov ax, bx
mov [edi], eax
nf23_73:mov ax, bx
shl eax, 16
nf23_74:mov ax, bx
mov [edi+4], eax
sub edi, nfpk_back_right
add esi, 4
retn
;----------------------------------------
ALIGN 4
nf8: ; 2x2 4x4x1 (16 bytes)
mov ax, [esi]
cmp al, ah
ja nf24
; <WIP> Note: This could be made faster with a new (16 16-bit entry) table.
xor eax, eax
lea ecx, nfpk_mov8
lea edx, byte ptr ds:nf8_11+2
mov al, [esi+2]
mov bl, al
shr bl, 4
xor bl, al
and bl, 0aH
xor al, bl
mov ebx, [ecx+eax*4]
mov [edx+(nf8_11-nf8_11)], bl
mov [edx+(nf8_12-nf8_11)], bh
mov al, [esi+3]
mov bl, al
shr bl, 4
xor bl, al
and bl, 0aH
xor al, bl
mov ebx, [ecx+eax*4]
mov [edx+(nf8_21-nf8_11)], bl
mov [edx+(nf8_22-nf8_11)], bh
mov al, [esi+6]
mov bl, al
shr bl, 4
xor bl, al
and bl, 0aH
xor al, bl
mov ebx, [ecx+eax*4]
mov [edx+(nf8_31-nf8_11)], bl
mov [edx+(nf8_32-nf8_11)], bh
mov al, [esi+7]
mov bl, al
shr bl, 4
xor bl, al
and bl, 0aH
xor al, bl
mov ebx, [ecx+eax*4]
mov [edx+(nf8_41-nf8_11)], bl
mov [edx+(nf8_42-nf8_11)], bh
add edx, nf8_51-nf8_11
mov al, [esi+10]
mov bl, al
shr bl, 4
xor bl, al
and bl, 0aH
xor al, bl
mov ebx, [ecx+eax*4]
mov [edx+(nf8_51-nf8_51)], bl
mov [edx+(nf8_52-nf8_51)], bh
mov al, [esi+11]
mov bl, al
shr bl, 4
xor bl, al
and bl, 0aH
xor al, bl
mov ebx, [ecx+eax*4]
mov [edx+(nf8_61-nf8_51)], bl
mov [edx+(nf8_62-nf8_51)], bh
mov al, [esi+14]
mov bl, al
shr bl, 4
xor bl, al
and bl, 0aH
xor al, bl
mov ebx, [ecx+eax*4]
mov [edx+(nf8_71-nf8_51)], bl
mov [edx+(nf8_72-nf8_51)], bh
mov al, [esi+15]
mov bl, al
shr bl, 4
xor bl, al
and bl, 0aH
xor al, bl
mov ebx, [ecx+eax*4]
mov [edx+(nf8_81-nf8_51)], bl
mov [edx+(nf8_82-nf8_51)], bh
push ebp
push esi
; load bx,dx,cx,bp with 00,01,10,11 color combinations
; (note that bits are read least significant first).
mov cx, [esi]
mov esi, nf_width
mov bl,cl
mov bh,cl
mov dl,ch
mov dh,cl
mov al,ch
mov ah,ch
mov ebp,eax
jmp nf8_0 ; flush prefetch
ALIGN 4
nf8_0:
nf8_11: mov ax, bx
shl eax, 16
nf8_12: mov ax, bx
mov [edi], eax
add edi, esi
nf8_21: mov ax, bx
shl eax, 16
nf8_22: mov ax, bx
mov [edi], eax
add edi, esi
mov eax, [esp]
mov cx, [eax+4]
mov bl,cl
mov bh,cl
mov dl,ch
mov dh,cl
mov al,ch
mov ah,ch
mov ebp,eax
nf8_31: mov ax, bx
shl eax, 16
nf8_32: mov ax, bx
mov [edi], eax
add edi, esi
nf8_41: mov ax, bx
shl eax, 16
nf8_42: mov ax, bx
mov [edi], eax
add edi, esi
lea eax, [esi*4-4]
sub edi, eax
mov eax, [esp]
mov cx, [eax+8]
mov bl,cl
mov bh,cl
mov dl,ch
mov dh,cl
mov al,ch
mov ah,ch
mov ebp,eax
nf8_51: mov ax, bx
shl eax, 16
nf8_52: mov ax, bx
mov [edi], eax
add edi, esi
nf8_61: mov ax, bx
shl eax, 16
nf8_62: mov ax, bx
mov [edi], eax
add edi, esi
mov eax, [esp]
mov cx, [eax+12]
mov bl,cl
mov bh,cl
mov dl,ch
mov dh,cl
mov al,ch
mov ah,ch
mov ebp,eax
nf8_71: mov ax, bx
shl eax, 16
nf8_72: mov ax, bx
mov [edi], eax
add edi, esi
nf8_81: mov ax, bx
shl eax, 16
nf8_82: mov ax, bx
mov [edi], eax
pop esi
pop ebp
add esi, 16
sub edi, 4
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
;----------------------------------------
ALIGN 4
;nf8+16
nf24: ; 2x1 4x8x1 (12 bytes)
mov ax, [esi+6]
cmp al, ah
ja nf40
; <WIP> Note: This could be made faster with a new (16 16-bit entry) table.
xor eax, eax
lea ecx, nfpk_mov8
lea edx, byte ptr ds:nf24_11+2
mov al, [esi+2]
mov bl, al
shr bl, 4
xor bl, al
and bl, 0aH
xor al, bl
mov ebx, [ecx+eax*4]
mov [edx+(nf24_11-nf24_11)], bl
mov [edx+(nf24_12-nf24_11)], bh
mov al, [esi+3]
mov bl, al
shr bl, 4
xor bl, al
and bl, 0aH
xor al, bl
mov ebx, [ecx+eax*4]
mov [edx+(nf24_21-nf24_11)], bl
mov [edx+(nf24_22-nf24_11)], bh
mov al, [esi+4]
mov bl, al
shr bl, 4
xor bl, al
and bl, 0aH
xor al, bl
mov ebx, [ecx+eax*4]
mov [edx+(nf24_31-nf24_11)], bl
mov [edx+(nf24_32-nf24_11)], bh
mov al, [esi+5]
mov bl, al
shr bl, 4
xor bl, al
and bl, 0aH
xor al, bl
mov ebx, [ecx+eax*4]
mov [edx+(nf24_41-nf24_11)], bl
mov [edx+(nf24_42-nf24_11)], bh
add edx, nf24_51-nf24_11
mov al, [esi+8]
mov bl, al
shr bl, 4
xor bl, al
and bl, 0aH
xor al, bl
mov ebx, [ecx+eax*4]
mov [edx+(nf24_51-nf24_51)], bl
mov [edx+(nf24_52-nf24_51)], bh
mov al, [esi+9]
mov bl, al
shr bl, 4
xor bl, al
and bl, 0aH
xor al, bl
mov ebx, [ecx+eax*4]
mov [edx+(nf24_61-nf24_51)], bl
mov [edx+(nf24_62-nf24_51)], bh
mov al, [esi+10]
mov bl, al
shr bl, 4
xor bl, al
and bl, 0aH
xor al, bl
mov ebx, [ecx+eax*4]
mov [edx+(nf24_71-nf24_51)], bl
mov [edx+(nf24_72-nf24_51)], bh
mov al, [esi+11]
mov bl, al
shr bl, 4
xor bl, al
and bl, 0aH
xor al, bl
mov ebx, [ecx+eax*4]
mov [edx+(nf24_81-nf24_51)], bl
mov [edx+(nf24_82-nf24_51)], bh
push ebp
push esi
; load bx,dx,cx,bp with 00,01,10,11 color combinations
; (note that bits are read least significant first).
mov cx, [esi]
mov esi, nf_width
mov bl,cl
mov bh,cl
mov dl,ch
mov dh,cl
mov al,ch
mov ah,ch
mov ebp,eax
jmp nf24_0 ; flush prefetch
ALIGN 4
nf24_0:
nf24_11:mov ax, bx
shl eax, 16
nf24_12:mov ax, bx
mov [edi], eax
add edi, esi
nf24_21:mov ax, bx
shl eax, 16
nf24_22:mov ax, bx
mov [edi], eax
add edi, esi
nf24_31:mov ax, bx
shl eax, 16
nf24_32:mov ax, bx
mov [edi], eax
add edi, esi
nf24_41:mov ax, bx
shl eax, 16
nf24_42:mov ax, bx
mov [edi], eax
add edi, esi
lea eax, [esi*4-4]
sub edi, eax
mov eax, [esp]
mov cx, [eax+6]
mov bl,cl
mov bh,cl
mov dl,ch
mov dh,cl
mov al,ch
mov ah,ch
mov ebp,eax
nf24_51:mov ax, bx
shl eax, 16
nf24_52:mov ax, bx
mov [edi], eax
add edi, esi
nf24_61:mov ax, bx
shl eax, 16
nf24_62:mov ax, bx
mov [edi], eax
add edi, esi
nf24_71:mov ax, bx
shl eax, 16
nf24_72:mov ax, bx
mov [edi], eax
add edi, esi
nf24_81:mov ax, bx
shl eax, 16
nf24_82:mov ax, bx
mov [edi], eax
pop esi
pop ebp
add esi, 12
sub edi, 4
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
;----------------------------------------
ALIGN 4
;nf8+32
nf40: ; 1x2 8x4x1 (12 bytes)
xor eax, eax
lea ecx, nfpk_mov8
lea edx, byte ptr ds:nf40_11+2
mov al, [esi+2]
mov bl, al
xor bl, [esi+3]
and bl, 0ccH
xor al, bl
mov ebx, [ecx+eax*4]
mov [edx+(nf40_11-nf40_11)], bl
mov [edx+(nf40_12-nf40_11)], bh
shr ebx, 16
mov [edx+(nf40_13-nf40_11)], bl
mov [edx+(nf40_14-nf40_11)], bh
mov al, [esi+4]
mov bl, al
xor bl, [esi+5]
and bl, 0ccH
xor al, bl
mov ebx, [ecx+eax*4]
mov [edx+(nf40_31-nf40_11)], bl
mov [edx+(nf40_32-nf40_11)], bh
shr ebx, 16
mov [edx+(nf40_33-nf40_11)], bl
mov [edx+(nf40_34-nf40_11)], bh
add edx, nf40_51-nf40_11
mov al, [esi+8]
mov bl, al
xor bl, [esi+9]
and bl, 0ccH
xor al, bl
mov ebx, [ecx+eax*4]
mov [edx+(nf40_51-nf40_51)], bl
mov [edx+(nf40_52-nf40_51)], bh
shr ebx, 16
mov [edx+(nf40_53-nf40_51)], bl
mov [edx+(nf40_54-nf40_51)], bh
mov al, [esi+10]
mov bl, al
xor bl, [esi+11]
and bl, 0ccH
xor al, bl
mov ebx, [ecx+eax*4]
mov [edx+(nf40_71-nf40_51)], bl
mov [edx+(nf40_72-nf40_51)], bh
shr ebx, 16
mov [edx+(nf40_73-nf40_51)], bl
mov [edx+(nf40_74-nf40_51)], bh
push ebp
push esi
; load bx,dx,cx,bp with 00,01,10,11 color combinations
; (note that bits are read least significant first).
mov cx, [esi]
mov esi, nf_width
mov bl,cl
mov bh,cl
mov dl,ch
mov dh,cl
mov al,ch
mov ah,ch
mov ebp,eax
jmp nf40_0 ; flush prefetch
ALIGN 4
nf40_0:
nf40_11:mov ax, bx
shl eax, 16
nf40_12:mov ax, bx
mov [edi], eax
nf40_13:mov ax, bx
shl eax, 16
nf40_14:mov ax, bx
mov [edi+4], eax
add edi, esi
nf40_31:mov ax, bx
shl eax, 16
nf40_32:mov ax, bx
mov [edi], eax
nf40_33:mov ax, bx
shl eax, 16
nf40_34:mov ax, bx
mov [edi+4], eax
add edi, esi
mov eax, [esp]
mov cx, [eax+6]
mov bl,cl
mov bh,cl
mov dl,ch
mov dh,cl
mov al,ch
mov ah,ch
mov ebp,eax
nf40_51:mov ax, bx
shl eax, 16
nf40_52:mov ax, bx
mov [edi], eax
nf40_53:mov ax, bx
shl eax, 16
nf40_54:mov ax, bx
mov [edi+4], eax
add edi, esi
nf40_71:mov ax, bx
shl eax, 16
nf40_72:mov ax, bx
mov [edi], eax
nf40_73:mov ax, bx
shl eax, 16
nf40_74:mov ax, bx
mov [edi+4], eax
pop esi
pop ebp
add esi, 12
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
;----------------------------------------
ALIGN 4
nf9: ; 8x8x2 (20 bytes)
mov eax, [esi]
cmp al, ah
ja nf41
shr eax, 16
cmp al, ah
ja nf25
xor eax, eax
lea ecx, nfpk_mov4
lea edx, byte ptr ds:nf9_11+1
mov al, [esi+4]
mov bl, al
xor bl, [esi+6]
and bl, 0ccH
xor al, bl
mov ebx, [ecx+eax*4]
mov [edx+(nf9_11-nf9_11)], bl
mov [edx+(nf9_12-nf9_11)], bh
shr ebx, 16
mov [edx+(nf9_13-nf9_11)], bl
mov [edx+(nf9_14-nf9_11)], bh
mov al, [esi+5]
mov bl, al
xor bl, [esi+7]
and bl, 0ccH
xor al, bl
mov ebx, [ecx+eax*4]
mov [edx+(nf9_15-nf9_11)], bl
mov [edx+(nf9_16-nf9_11)], bh
shr ebx, 16
mov [edx+(nf9_17-nf9_11)], bl
mov [edx+(nf9_18-nf9_11)], bh
mov al, [esi+8]
mov bl, al
xor bl, [esi+10]
and bl, 0ccH
xor al, bl
mov ebx, [ecx+eax*4]
mov [edx+(nf9_31-nf9_11)], bl
mov [edx+(nf9_32-nf9_11)], bh
shr ebx, 16
mov [edx+(nf9_33-nf9_11)], bl
mov [edx+(nf9_34-nf9_11)], bh
mov al, [esi+9]
mov bl, al
xor bl, [esi+11]
and bl, 0ccH
xor al, bl
mov ebx, [ecx+eax*4]
mov [edx+(nf9_35-nf9_11)], bl
mov [edx+(nf9_36-nf9_11)], bh
shr ebx, 16
mov [edx+(nf9_37-nf9_11)], bl
mov [edx+(nf9_38-nf9_11)], bh
lea edx, [edx+(nf9_51-nf9_11)]
mov al, [esi+12]
mov bl, al
xor bl, [esi+14]
and bl, 0ccH
xor al, bl
mov ebx, [ecx+eax*4]
mov [edx+(nf9_51-nf9_51)], bl
mov [edx+(nf9_52-nf9_51)], bh
shr ebx, 16
mov [edx+(nf9_53-nf9_51)], bl
mov [edx+(nf9_54-nf9_51)], bh
mov al, [esi+13]
mov bl, al
xor bl, [esi+15]
and bl, 0ccH
xor al, bl
mov ebx, [ecx+eax*4]
mov [edx+(nf9_55-nf9_51)], bl
mov [edx+(nf9_56-nf9_51)], bh
shr ebx, 16
mov [edx+(nf9_57-nf9_51)], bl
mov [edx+(nf9_58-nf9_51)], bh
mov al, [esi+16]
mov bl, al
xor bl, [esi+18]
and bl, 0ccH
xor al, bl
mov ebx, [ecx+eax*4]
mov [edx+(nf9_71-nf9_51)], bl
mov [edx+(nf9_72-nf9_51)], bh
shr ebx, 16
mov [edx+(nf9_73-nf9_51)], bl
mov [edx+(nf9_74-nf9_51)], bh
mov al, [esi+17]
mov bl, al
xor bl, [esi+19]
and bl, 0ccH
xor al, bl
mov ebx, [ecx+eax*4]
mov [edx+(nf9_75-nf9_51)], bl
mov [edx+(nf9_76-nf9_51)], bh
shr ebx, 16
mov [edx+(nf9_77-nf9_51)], bl
mov [edx+(nf9_78-nf9_51)], bh
; Load bl,bh,cl,ch with four colors
mov bx, [esi]
mov cx, [esi+2]
mov edx, nf_width
jmp nf9_0 ; flush prefetch
ALIGN 4
nf9_0:
nf9_11: mov al, bl
nf9_12: mov ah, bl
shl eax, 16
nf9_13: mov al, bl
nf9_14: mov ah, bl
mov [edi], eax
nf9_15: mov al, bl
nf9_16: mov ah, bl
shl eax, 16
nf9_17: mov al, bl
nf9_18: mov ah, bl
mov [edi+4], eax
add edi, edx
nf9_31: mov al, bl
nf9_32: mov ah, bl
shl eax, 16
nf9_33: mov al, bl
nf9_34: mov ah, bl
mov [edi], eax
nf9_35: mov al, bl
nf9_36: mov ah, bl
shl eax, 16
nf9_37: mov al, bl
nf9_38: mov ah, bl
mov [edi+4], eax
add edi, edx
nf9_51: mov al, bl
nf9_52: mov ah, bl
shl eax, 16
nf9_53: mov al, bl
nf9_54: mov ah, bl
mov [edi], eax
nf9_55: mov al, bl
nf9_56: mov ah, bl
shl eax, 16
nf9_57: mov al, bl
nf9_58: mov ah, bl
mov [edi+4], eax
add edi, edx
nf9_71: mov al, bl
nf9_72: mov ah, bl
shl eax, 16
nf9_73: mov al, bl
nf9_74: mov ah, bl
mov [edi], eax
nf9_75: mov al, bl
nf9_76: mov ah, bl
shl eax, 16
nf9_77: mov al, bl
nf9_78: mov ah, bl
mov [edi+4], eax
add esi, 20
sub edi, nfpk_back_right
retn
;----------------------------------------
ALIGN 4
;nf9+16
nf25: ; low 4x4x2 (8 bytes)
if 0 ;debug
mov eax, 0
mov ebx, 0
add esi, 8
jmp nf_solid
endif
xor eax, eax
lea ecx, nfpk_mov4
lea edx, byte ptr ds:nf25_11+1
mov al, [esi+4]
mov ebx, [ecx+eax*4]
mov [edx+(nf25_14-nf25_11)], bl
mov [edx+(nf25_13-nf25_11)], bh
shr ebx, 16
mov [edx+(nf25_12-nf25_11)], bl
mov [edx+(nf25_11-nf25_11)], bh
mov al, [esi+5]
mov ebx, [ecx+eax*4]
mov [edx+(nf25_24-nf25_11)], bl
mov [edx+(nf25_23-nf25_11)], bh
shr ebx, 16
mov [edx+(nf25_22-nf25_11)], bl
mov [edx+(nf25_21-nf25_11)], bh
mov al, [esi+6]
mov ebx, [ecx+eax*4]
mov [edx+(nf25_34-nf25_11)], bl
mov [edx+(nf25_33-nf25_11)], bh
shr ebx, 16
mov [edx+(nf25_32-nf25_11)], bl
mov [edx+(nf25_31-nf25_11)], bh
mov al, [esi+7]
mov ebx, [ecx+eax*4]
mov [edx+(nf25_44-nf25_11)], bl
mov [edx+(nf25_43-nf25_11)], bh
shr ebx, 16
mov [edx+(nf25_42-nf25_11)], bl
mov [edx+(nf25_41-nf25_11)], bh
; Load bl,bh,cl,ch with four colors
mov bx, [esi]
mov cx, [esi+2]
mov edx, nf_width
jmp nf25_0 ; flush prefetch
ALIGN 4
nf25_0:
nf25_11:mov ah, bl
mov al, ah
shl eax, 16
nf25_12:mov al, bl
mov ah, al
mov [edi], eax
nf25_13:mov ah, bl
mov al, ah
shl eax, 16
nf25_14:mov al, bl
mov ah, al
mov [edi+4], eax
add edi, edx
nf25_21:mov ah, bl
mov al, ah
shl eax, 16
nf25_22:mov al, bl
mov ah, al
mov [edi], eax
nf25_23:mov ah, bl
mov al, ah
shl eax, 16
nf25_24:mov al, bl
mov ah, al
mov [edi+4], eax
add edi, edx
nf25_31:mov ah, bl
mov al, ah
shl eax, 16
nf25_32:mov al, bl
mov ah, al
mov [edi], eax
nf25_33:mov ah, bl
mov al, ah
shl eax, 16
nf25_34:mov al, bl
mov ah, al
mov [edi+4], eax
add edi, edx
nf25_41:mov ah, bl
mov al, ah
shl eax, 16
nf25_42:mov al, bl
mov ah, al
mov [edi], eax
nf25_43:mov ah, bl
mov al, ah
shl eax, 16
nf25_44:mov al, bl
mov ah, al
mov [edi+4], eax
add esi, 8
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
;----------------------------------------
ALIGN 4
;nf9+32
nf41: ; low 4x8x2 (12 bytes)
shr eax, 16
cmp al, ah
ja nf57
if 0 ;debug
mov eax, 0
mov ebx, 0
add esi, 12
jmp nf_solid
endif
xor eax, eax
lea ecx, nfpk_mov4
lea edx, byte ptr ds:nf41_11+1
mov al, [esi+4]
mov ebx, [ecx+eax*4]
mov [edx+(nf41_14-nf41_11)], bl
mov [edx+(nf41_13-nf41_11)], bh
shr ebx, 16
mov [edx+(nf41_12-nf41_11)], bl
mov [edx+(nf41_11-nf41_11)], bh
mov al, [esi+5]
mov ebx, [ecx+eax*4]
mov [edx+(nf41_24-nf41_11)], bl
mov [edx+(nf41_23-nf41_11)], bh
shr ebx, 16
mov [edx+(nf41_22-nf41_11)], bl
mov [edx+(nf41_21-nf41_11)], bh
mov al, [esi+6]
mov ebx, [ecx+eax*4]
mov [edx+(nf41_34-nf41_11)], bl
mov [edx+(nf41_33-nf41_11)], bh
shr ebx, 16
mov [edx+(nf41_32-nf41_11)], bl
mov [edx+(nf41_31-nf41_11)], bh
mov al, [esi+7]
mov ebx, [ecx+eax*4]
mov [edx+(nf41_44-nf41_11)], bl
mov [edx+(nf41_43-nf41_11)], bh
shr ebx, 16
mov [edx+(nf41_42-nf41_11)], bl
mov [edx+(nf41_41-nf41_11)], bh
lea edx, [edx+(nf41_51-nf41_11)]
mov al, [esi+8]
mov ebx, [ecx+eax*4]
mov [edx+(nf41_54-nf41_51)], bl
mov [edx+(nf41_53-nf41_51)], bh
shr ebx, 16
mov [edx+(nf41_52-nf41_51)], bl
mov [edx+(nf41_51-nf41_51)], bh
mov al, [esi+9]
mov ebx, [ecx+eax*4]
mov [edx+(nf41_64-nf41_51)], bl
mov [edx+(nf41_63-nf41_51)], bh
shr ebx, 16
mov [edx+(nf41_62-nf41_51)], bl
mov [edx+(nf41_61-nf41_51)], bh
mov al, [esi+10]
mov ebx, [ecx+eax*4]
mov [edx+(nf41_74-nf41_51)], bl
mov [edx+(nf41_73-nf41_51)], bh
shr ebx, 16
mov [edx+(nf41_72-nf41_51)], bl
mov [edx+(nf41_71-nf41_51)], bh
mov al, [esi+11]
mov ebx, [ecx+eax*4]
mov [edx+(nf41_84-nf41_51)], bl
mov [edx+(nf41_83-nf41_51)], bh
shr ebx, 16
mov [edx+(nf41_82-nf41_51)], bl
mov [edx+(nf41_81-nf41_51)], bh
; Load bl,bh,cl,ch with four colors
mov bx, [esi]
mov cx, [esi+2]
mov edx, nf_width
jmp nf41_0 ; flush prefetch
ALIGN 4
nf41_0:
nf41_11:mov ah, bl
mov al, ah
nf41_21:mov ah, bl
shl eax, 16
nf41_22:mov al, bl
mov ah, al
nf41_12:mov al, bl
mov [edi], eax
nf41_13:mov ah, bl
mov al, ah
nf41_23:mov ah, bl
shl eax, 16
nf41_24:mov al, bl
mov ah, al
nf41_14:mov al, bl
mov [edi+4], eax
add edi, edx
nf41_31:mov ah, bl
mov al, ah
nf41_41:mov ah, bl
shl eax, 16
nf41_42:mov al, bl
mov ah, al
nf41_32:mov al, bl
mov [edi], eax
nf41_33:mov ah, bl
mov al, ah
nf41_43:mov ah, bl
shl eax, 16
nf41_44:mov al, bl
mov ah, al
nf41_34:mov al, bl
mov [edi+4], eax
add edi, edx
nf41_51:mov ah, bl
mov al, ah
nf41_61:mov ah, bl
shl eax, 16
nf41_62:mov al, bl
mov ah, al
nf41_52:mov al, bl
mov [edi], eax
nf41_53:mov ah, bl
mov al, ah
nf41_63:mov ah, bl
shl eax, 16
nf41_64:mov al, bl
mov ah, al
nf41_54:mov al, bl
mov [edi+4], eax
add edi, edx
nf41_71:mov ah, bl
mov al, ah
nf41_81:mov ah, bl
shl eax, 16
nf41_82:mov al, bl
mov ah, al
nf41_72:mov al, bl
mov [edi], eax
nf41_73:mov ah, bl
mov al, ah
nf41_83:mov ah, bl
shl eax, 16
nf41_84:mov al, bl
mov ah, al
nf41_74:mov al, bl
mov [edi+4], eax
add esi, 12
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
;----------------------------------------
ALIGN 4
;nf9+48
nf57: ; low 8x4x2 (12 bytes)
xor eax, eax
lea ecx, nfpk_mov4
lea edx, byte ptr ds:nf57_11+1
mov al, [esi+4]
mov ebx, [ecx+eax*4]
mov [edx+(nf57_11-nf57_11)], bl
mov [edx+(nf57_12-nf57_11)], bh
shr ebx, 16
mov [edx+(nf57_13-nf57_11)], bl
mov [edx+(nf57_14-nf57_11)], bh
mov al, [esi+5]
mov ebx, [ecx+eax*4]
mov [edx+(nf57_15-nf57_11)], bl
mov [edx+(nf57_16-nf57_11)], bh
shr ebx, 16
mov [edx+(nf57_17-nf57_11)], bl
mov [edx+(nf57_18-nf57_11)], bh
mov al, [esi+6]
mov ebx, [ecx+eax*4]
mov [edx+(nf57_21-nf57_11)], bl
mov [edx+(nf57_22-nf57_11)], bh
shr ebx, 16
mov [edx+(nf57_23-nf57_11)], bl
mov [edx+(nf57_24-nf57_11)], bh
mov al, [esi+7]
mov ebx, [ecx+eax*4]
mov [edx+(nf57_25-nf57_11)], bl
mov [edx+(nf57_26-nf57_11)], bh
shr ebx, 16
mov [edx+(nf57_27-nf57_11)], bl
mov [edx+(nf57_28-nf57_11)], bh
mov al, [esi+8]
mov ebx, [ecx+eax*4]
mov [edx+(nf57_31-nf57_11)], bl
mov [edx+(nf57_32-nf57_11)], bh
shr ebx, 16
mov [edx+(nf57_33-nf57_11)], bl
mov [edx+(nf57_34-nf57_11)], bh
mov al, [esi+9]
mov ebx, [ecx+eax*4]
mov [edx+(nf57_35-nf57_11)], bl
mov [edx+(nf57_36-nf57_11)], bh
shr ebx, 16
mov [edx+(nf57_37-nf57_11)], bl
mov [edx+(nf57_38-nf57_11)], bh
mov al, [esi+10]
mov ebx, [ecx+eax*4]
mov [edx+(nf57_41-nf57_11)], bl
mov [edx+(nf57_42-nf57_11)], bh
shr ebx, 16
mov [edx+(nf57_43-nf57_11)], bl
mov [edx+(nf57_44-nf57_11)], bh
mov al, [esi+11]
mov ebx, [ecx+eax*4]
mov [edx+(nf57_45-nf57_11)], bl
mov [edx+(nf57_46-nf57_11)], bh
shr ebx, 16
mov [edx+(nf57_47-nf57_11)], bl
mov [edx+(nf57_48-nf57_11)], bh
; Load bl,bh,cl,ch with four colors
mov bx, [esi]
mov cx, [esi+2]
mov edx, nf_width
jmp nf57_0 ; flush prefetch
ALIGN 4
nf57_0:
nf57_11:mov al, bl
nf57_12:mov ah, bl
shl eax, 16
nf57_13:mov al, bl
nf57_14:mov ah, bl
mov [edi], eax
nf57_15:mov al, bl
nf57_16:mov ah, bl
shl eax, 16
nf57_17:mov al, bl
nf57_18:mov ah, bl
mov [edi+4], eax
add edi, edx
nf57_21:mov al, bl
nf57_22:mov ah, bl
shl eax, 16
nf57_23:mov al, bl
nf57_24:mov ah, bl
mov [edi], eax
nf57_25:mov al, bl
nf57_26:mov ah, bl
shl eax, 16
nf57_27:mov al, bl
nf57_28:mov ah, bl
mov [edi+4], eax
add edi, edx
nf57_31:mov al, bl
nf57_32:mov ah, bl
shl eax, 16
nf57_33:mov al, bl
nf57_34:mov ah, bl
mov [edi], eax
nf57_35:mov al, bl
nf57_36:mov ah, bl
shl eax, 16
nf57_37:mov al, bl
nf57_38:mov ah, bl
mov [edi+4], eax
add edi, edx
nf57_41:mov al, bl
nf57_42:mov ah, bl
shl eax, 16
nf57_43:mov al, bl
nf57_44:mov ah, bl
mov [edi], eax
nf57_45:mov al, bl
nf57_46:mov ah, bl
shl eax, 16
nf57_47:mov al, bl
nf57_48:mov ah, bl
mov [edi+4], eax
add esi, 12
sub edi, nfpk_back_right
retn
;----------------------------------------
ALIGN 4
nf10: ; 2x2 4x4x2 (32 bytes)
mov ax, [esi]
cmp al, ah
ja nf26
xor eax, eax
lea ecx, nfpk_mov4
lea edx, byte ptr ds:nf10_11+1
mov al, [esi+4]
mov bl, al
xor bl, [esi+5]
and bl, 0ccH
xor al, bl
mov ebx, [ecx+eax*4]
mov [edx+(nf10_11-nf10_11)], bl
mov [edx+(nf10_12-nf10_11)], bh
shr ebx, 16
mov [edx+(nf10_13-nf10_11)], bl
mov [edx+(nf10_14-nf10_11)], bh
mov al, [esi+6]
mov bl, al
xor bl, [esi+7]
and bl, 0ccH
xor al, bl
mov ebx, [ecx+eax*4]
mov [edx+(nf10_21-nf10_11)], bl
mov [edx+(nf10_22-nf10_11)], bh
shr ebx, 16
mov [edx+(nf10_23-nf10_11)], bl
mov [edx+(nf10_24-nf10_11)], bh
mov al, [esi+12]
mov bl, al
xor bl, [esi+13]
and bl, 0ccH
xor al, bl
mov ebx, [ecx+eax*4]
mov [edx+(nf10_31-nf10_11)], bl
mov [edx+(nf10_32-nf10_11)], bh
shr ebx, 16
mov [edx+(nf10_33-nf10_11)], bl
mov [edx+(nf10_34-nf10_11)], bh
mov al, [esi+14]
mov bl, al
xor bl, [esi+15]
and bl, 0ccH
xor al, bl
mov ebx, [ecx+eax*4]
mov [edx+(nf10_41-nf10_11)], bl
mov [edx+(nf10_42-nf10_11)], bh
shr ebx, 16
mov [edx+(nf10_43-nf10_11)], bl
mov [edx+(nf10_44-nf10_11)], bh
lea edx, [edx+(nf10_51-nf10_11)]
mov al, [esi+20]
mov bl, al
xor bl, [esi+21]
and bl, 0ccH
xor al, bl
mov ebx, [ecx+eax*4]
mov [edx+(nf10_51-nf10_51)], bl
mov [edx+(nf10_52-nf10_51)], bh
shr ebx, 16
mov [edx+(nf10_53-nf10_51)], bl
mov [edx+(nf10_54-nf10_51)], bh
mov al, [esi+22]
mov bl, al
xor bl, [esi+23]
and bl, 0ccH
xor al, bl
mov ebx, [ecx+eax*4]
mov [edx+(nf10_61-nf10_51)], bl
mov [edx+(nf10_62-nf10_51)], bh
shr ebx, 16
mov [edx+(nf10_63-nf10_51)], bl
mov [edx+(nf10_64-nf10_51)], bh
mov al, [esi+28]
mov bl, al
xor bl, [esi+29]
and bl, 0ccH
xor al, bl
mov ebx, [ecx+eax*4]
mov [edx+(nf10_71-nf10_51)], bl
mov [edx+(nf10_72-nf10_51)], bh
shr ebx, 16
mov [edx+(nf10_73-nf10_51)], bl
mov [edx+(nf10_74-nf10_51)], bh
mov al, [esi+30]
mov bl, al
xor bl, [esi+31]
and bl, 0ccH
xor al, bl
mov ebx, [ecx+eax*4]
mov [edx+(nf10_81-nf10_51)], bl
mov [edx+(nf10_82-nf10_51)], bh
shr ebx, 16
mov [edx+(nf10_83-nf10_51)], bl
mov [edx+(nf10_84-nf10_51)], bh
; Load bl,bh,cl,ch with four colors
mov bx, [esi]
mov cx, [esi+2]
mov edx, nf_width
jmp nf10_0 ; flush prefetch
ALIGN 4
nf10_0:
nf10_11:mov al, bl
nf10_12:mov ah, bl
shl eax, 16
nf10_13:mov al, bl
nf10_14:mov ah, bl
mov [edi], eax
add edi, edx
nf10_21:mov al, bl
nf10_22:mov ah, bl
shl eax, 16
nf10_23:mov al, bl
nf10_24:mov ah, bl
mov [edi], eax
add edi, edx
; Load bl,bh,cl,ch with four colors
mov bx, [esi+8]
mov cx, [esi+10]
nf10_31:mov al, bl
nf10_32:mov ah, bl
shl eax, 16
nf10_33:mov al, bl
nf10_34:mov ah, bl
mov [edi], eax
add edi, edx
nf10_41:mov al, bl
nf10_42:mov ah, bl
shl eax, 16
nf10_43:mov al, bl
nf10_44:mov ah, bl
mov [edi], eax
add edi, edx
lea eax, [edx*4-4]
sub edi, eax
; Load bl,bh,cl,ch with four colors
mov bx, [esi+16]
mov cx, [esi+18]
nf10_51:mov al, bl
nf10_52:mov ah, bl
shl eax, 16
nf10_53:mov al, bl
nf10_54:mov ah, bl
mov [edi], eax
add edi, edx
nf10_61:mov al, bl
nf10_62:mov ah, bl
shl eax, 16
nf10_63:mov al, bl
nf10_64:mov ah, bl
mov [edi], eax
add edi, edx
; Load bl,bh,cl,ch with four colors
mov bx, [esi+24]
mov cx, [esi+26]
nf10_71:mov al, bl
nf10_72:mov ah, bl
shl eax, 16
nf10_73:mov al, bl
nf10_74:mov ah, bl
mov [edi], eax
add edi, edx
nf10_81:mov al, bl
nf10_82:mov ah, bl
shl eax, 16
nf10_83:mov al, bl
nf10_84:mov ah, bl
mov [edi], eax
add esi, 32
sub edi, 4
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
;----------------------------------------
ALIGN 4
;nf10+16
nf26: ; 2x1 4x8x2 (24 bytes)
mov ax, [esi+12]
cmp al, ah
ja nf42
if 0 ;debug
mov eax, 0
mov ebx, 0
add esi, 24
jmp nf_solid
endif
xor eax, eax
lea ecx, nfpk_mov4
lea edx, byte ptr ds:nf26_11+1
mov al, [esi+4]
mov bl, al
xor bl, [esi+5]
and bl, 0ccH
xor al, bl
mov ebx, [ecx+eax*4]
mov [edx+(nf26_11-nf26_11)], bl
mov [edx+(nf26_12-nf26_11)], bh
shr ebx, 16
mov [edx+(nf26_13-nf26_11)], bl
mov [edx+(nf26_14-nf26_11)], bh
mov al, [esi+6]
mov bl, al
xor bl, [esi+7]
and bl, 0ccH
xor al, bl
mov ebx, [ecx+eax*4]
mov [edx+(nf26_21-nf26_11)], bl
mov [edx+(nf26_22-nf26_11)], bh
shr ebx, 16
mov [edx+(nf26_23-nf26_11)], bl
mov [edx+(nf26_24-nf26_11)], bh
mov al, [esi+8]
mov bl, al
xor bl, [esi+9]
and bl, 0ccH
xor al, bl
mov ebx, [ecx+eax*4]
mov [edx+(nf26_31-nf26_11)], bl
mov [edx+(nf26_32-nf26_11)], bh
shr ebx, 16
mov [edx+(nf26_33-nf26_11)], bl
mov [edx+(nf26_34-nf26_11)], bh
mov al, [esi+10]
mov bl, al
xor bl, [esi+11]
and bl, 0ccH
xor al, bl
mov ebx, [ecx+eax*4]
mov [edx+(nf26_41-nf26_11)], bl
mov [edx+(nf26_42-nf26_11)], bh
shr ebx, 16
mov [edx+(nf26_43-nf26_11)], bl
mov [edx+(nf26_44-nf26_11)], bh
lea edx, [edx+(nf26_51-nf26_11)]
mov al, [esi+16]
mov bl, al
xor bl, [esi+17]
and bl, 0ccH
xor al, bl
mov ebx, [ecx+eax*4]
mov [edx+(nf26_51-nf26_51)], bl
mov [edx+(nf26_52-nf26_51)], bh
shr ebx, 16
mov [edx+(nf26_53-nf26_51)], bl
mov [edx+(nf26_54-nf26_51)], bh
mov al, [esi+18]
mov bl, al
xor bl, [esi+19]
and bl, 0ccH
xor al, bl
mov ebx, [ecx+eax*4]
mov [edx+(nf26_61-nf26_51)], bl
mov [edx+(nf26_62-nf26_51)], bh
shr ebx, 16
mov [edx+(nf26_63-nf26_51)], bl
mov [edx+(nf26_64-nf26_51)], bh
mov al, [esi+20]
mov bl, al
xor bl, [esi+21]
and bl, 0ccH
xor al, bl
mov ebx, [ecx+eax*4]
mov [edx+(nf26_71-nf26_51)], bl
mov [edx+(nf26_72-nf26_51)], bh
shr ebx, 16
mov [edx+(nf26_73-nf26_51)], bl
mov [edx+(nf26_74-nf26_51)], bh
mov al, [esi+22]
mov bl, al
xor bl, [esi+23]
and bl, 0ccH
xor al, bl
mov ebx, [ecx+eax*4]
mov [edx+(nf26_81-nf26_51)], bl
mov [edx+(nf26_82-nf26_51)], bh
shr ebx, 16
mov [edx+(nf26_83-nf26_51)], bl
mov [edx+(nf26_84-nf26_51)], bh
; Load bl,bh,cl,ch with four colors
mov bx, [esi]
mov cx, [esi+2]
mov edx, nf_width
jmp nf26_0 ; flush prefetch
ALIGN 4
nf26_0:
nf26_11:mov al, bl
nf26_12:mov ah, bl
shl eax, 16
nf26_13:mov al, bl
nf26_14:mov ah, bl
mov [edi], eax
add edi, edx
nf26_21:mov al, bl
nf26_22:mov ah, bl
shl eax, 16
nf26_23:mov al, bl
nf26_24:mov ah, bl
mov [edi], eax
add edi, edx
nf26_31:mov al, bl
nf26_32:mov ah, bl
shl eax, 16
nf26_33:mov al, bl
nf26_34:mov ah, bl
mov [edi], eax
add edi, edx
nf26_41:mov al, bl
nf26_42:mov ah, bl
shl eax, 16
nf26_43:mov al, bl
nf26_44:mov ah, bl
mov [edi], eax
add edi, edx
lea eax, [edx*4-4]
sub edi, eax
; Load bl,bh,cl,ch with four colors
mov bx, [esi+12]
mov cx, [esi+14]
nf26_51:mov al, bl
nf26_52:mov ah, bl
shl eax, 16
nf26_53:mov al, bl
nf26_54:mov ah, bl
mov [edi], eax
add edi, edx
nf26_61:mov al, bl
nf26_62:mov ah, bl
shl eax, 16
nf26_63:mov al, bl
nf26_64:mov ah, bl
mov [edi], eax
add edi, edx
nf26_71:mov al, bl
nf26_72:mov ah, bl
shl eax, 16
nf26_73:mov al, bl
nf26_74:mov ah, bl
mov [edi], eax
add edi, edx
nf26_81:mov al, bl
nf26_82:mov ah, bl
shl eax, 16
nf26_83:mov al, bl
nf26_84:mov ah, bl
mov [edi], eax
add esi, 24
sub edi, 4
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
;----------------------------------------
ALIGN 4
;nf10+32
nf42: ; 1x2 8x4x2 (24 bytes)
if 0 ;debug
mov eax, 0
mov ebx, 0
add esi, 24
jmp nf_solid
endif
xor eax, eax
lea ecx, nfpk_mov4
lea edx, byte ptr ds:nf42_11+1
mov al, [esi+4]
mov bl, al
xor bl, [esi+6]
and bl, 0ccH
xor al, bl
mov ebx, [ecx+eax*4]
mov [edx+(nf42_11-nf42_11)], bl
mov [edx+(nf42_12-nf42_11)], bh
shr ebx, 16
mov [edx+(nf42_13-nf42_11)], bl
mov [edx+(nf42_14-nf42_11)], bh
mov al, [esi+5]
mov bl, al
xor bl, [esi+7]
and bl, 0ccH
xor al, bl
mov ebx, [ecx+eax*4]
mov [edx+(nf42_15-nf42_11)], bl
mov [edx+(nf42_16-nf42_11)], bh
shr ebx, 16
mov [edx+(nf42_17-nf42_11)], bl
mov [edx+(nf42_18-nf42_11)], bh
mov al, [esi+8]
mov bl, al
xor bl, [esi+10]
and bl, 0ccH
xor al, bl
mov ebx, [ecx+eax*4]
mov [edx+(nf42_31-nf42_11)], bl
mov [edx+(nf42_32-nf42_11)], bh
shr ebx, 16
mov [edx+(nf42_33-nf42_11)], bl
mov [edx+(nf42_34-nf42_11)], bh
mov al, [esi+9]
mov bl, al
xor bl, [esi+11]
and bl, 0ccH
xor al, bl
mov ebx, [ecx+eax*4]
mov [edx+(nf42_35-nf42_11)], bl
mov [edx+(nf42_36-nf42_11)], bh
shr ebx, 16
mov [edx+(nf42_37-nf42_11)], bl
mov [edx+(nf42_38-nf42_11)], bh
lea edx, [edx+(nf42_51-nf42_11)]
mov al, [esi+16]
mov bl, al
xor bl, [esi+18]
and bl, 0ccH
xor al, bl
mov ebx, [ecx+eax*4]
mov [edx+(nf42_51-nf42_51)], bl
mov [edx+(nf42_52-nf42_51)], bh
shr ebx, 16
mov [edx+(nf42_53-nf42_51)], bl
mov [edx+(nf42_54-nf42_51)], bh
mov al, [esi+17]
mov bl, al
xor bl, [esi+19]
and bl, 0ccH
xor al, bl
mov ebx, [ecx+eax*4]
mov [edx+(nf42_55-nf42_51)], bl
mov [edx+(nf42_56-nf42_51)], bh
shr ebx, 16
mov [edx+(nf42_57-nf42_51)], bl
mov [edx+(nf42_58-nf42_51)], bh
mov al, [esi+20]
mov bl, al
xor bl, [esi+22]
and bl, 0ccH
xor al, bl
mov ebx, [ecx+eax*4]
mov [edx+(nf42_71-nf42_51)], bl
mov [edx+(nf42_72-nf42_51)], bh
shr ebx, 16
mov [edx+(nf42_73-nf42_51)], bl
mov [edx+(nf42_74-nf42_51)], bh
mov al, [esi+21]
mov bl, al
xor bl, [esi+23]
and bl, 0ccH
xor al, bl
mov ebx, [ecx+eax*4]
mov [edx+(nf42_75-nf42_51)], bl
mov [edx+(nf42_76-nf42_51)], bh
shr ebx, 16
mov [edx+(nf42_77-nf42_51)], bl
mov [edx+(nf42_78-nf42_51)], bh
; Load bl,bh,cl,ch with four colors
mov bx, [esi]
mov cx, [esi+2]
mov edx, nf_width
jmp nf42_0 ; flush prefetch
ALIGN 4
nf42_0:
nf42_11:mov al, bl
nf42_12:mov ah, bl
shl eax, 16
nf42_13:mov al, bl
nf42_14:mov ah, bl
mov [edi], eax
nf42_15:mov al, bl
nf42_16:mov ah, bl
shl eax, 16
nf42_17:mov al, bl
nf42_18:mov ah, bl
mov [edi+4], eax
add edi, edx
nf42_31:mov al, bl
nf42_32:mov ah, bl
shl eax, 16
nf42_33:mov al, bl
nf42_34:mov ah, bl
mov [edi], eax
nf42_35:mov al, bl
nf42_36:mov ah, bl
shl eax, 16
nf42_37:mov al, bl
nf42_38:mov ah, bl
mov [edi+4], eax
add edi, edx
; Load bl,bh,cl,ch with four colors
mov bx, [esi+12]
mov cx, [esi+14]
nf42_51:mov al, bl
nf42_52:mov ah, bl
shl eax, 16
nf42_53:mov al, bl
nf42_54:mov ah, bl
mov [edi], eax
nf42_55:mov al, bl
nf42_56:mov ah, bl
shl eax, 16
nf42_57:mov al, bl
nf42_58:mov ah, bl
mov [edi+4], eax
add edi, edx
nf42_71:mov al, bl
nf42_72:mov ah, bl
shl eax, 16
nf42_73:mov al, bl
nf42_74:mov ah, bl
mov [edi], eax
nf42_75:mov al, bl
nf42_76:mov ah, bl
shl eax, 16
nf42_77:mov al, bl
nf42_78:mov ah, bl
mov [edi+4], eax
add esi, 24
sub edi, nfpk_back_right
retn
;----------------------------------------
ALIGN 4
nf11: ; 8x8x8 (64 bytes)
if 0 ;debug
add esi, 64
mov eax, 0fefefefeH
; mov ebx, eax
mov ebx, 0
jmp nf_solid
endif
mov edx, nf_width
mov ebx, 0ff00ff00H
mov eax, [esi] ;0
mov ecx, eax
xor ecx, [esi+8]
and ecx, ebx
xor eax, ecx
mov [edi], eax
mov eax, [esi+4]
mov ecx, eax
xor ecx, [esi+4+8]
and ecx, ebx
xor eax, ecx
mov [edi+4], eax
add edi, edx
mov eax, [esi+16] ;2
mov ecx, eax
xor ecx, [esi+16+8]
and ecx, ebx
xor eax, ecx
mov [edi], eax
mov eax, [esi+20]
mov ecx, eax
xor ecx, [esi+20+8]
and ecx, ebx
xor eax, ecx
mov [edi+4], eax
add edi, edx
mov eax, [esi+32] ;4
mov ecx, eax
xor ecx, [esi+32+8]
and ecx, ebx
xor eax, ecx
mov [edi], eax
mov eax, [esi+36]
mov ecx, eax
xor ecx, [esi+36+8]
and ecx, ebx
xor eax, ecx
mov [edi+4], eax
add edi, edx
mov eax, [esi+48] ;6
mov ecx, eax
xor ecx, [esi+48+8]
and ecx, ebx
xor eax, ecx
mov [edi], eax
mov eax, [esi+52]
mov ecx, eax
xor ecx, [esi+52+8]
and ecx, ebx
xor eax, ecx
mov [edi+4], eax
add esi, 64
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
;----------------------------------------
ALIGN 4
nf12: ; low 4x4x8 (16 bytes)
mov edx, nf_width
mov eax, [esi]
mov bl, ah
mov bh, ah
shl ebx, 16
mov bl, al
mov bh, al
mov [edi], ebx
shr eax, 16
mov bl, ah
mov bh, ah
shl ebx, 16
mov bl, al
mov bh, al
mov [edi+4], ebx
add edi, edx
mov eax, [esi+4]
mov bl, ah
mov bh, ah
shl ebx, 16
mov bl, al
mov bh, al
mov [edi], ebx
shr eax, 16
mov bl, ah
mov bh, ah
shl ebx, 16
mov bl, al
mov bh, al
mov [edi+4], ebx
add edi, edx
mov eax, [esi+8]
mov bl, ah
mov bh, ah
shl ebx, 16
mov bl, al
mov bh, al
mov [edi], ebx
shr eax, 16
mov bl, ah
mov bh, ah
shl ebx, 16
mov bl, al
mov bh, al
mov [edi+4], ebx
add edi, edx
mov eax, [esi+12]
mov bl, ah
mov bh, ah
shl ebx, 16
mov bl, al
mov bh, al
mov [edi], ebx
shr eax, 16
mov bl, ah
mov bh, ah
shl ebx, 16
mov bl, al
mov bh, al
mov [edi+4], ebx
sub edi, nfpk_back_right
add esi, 16
retn
;----------------------------------------
ALIGN 4
nf13: ; 2x2 4x4x0 (4 bytes)
mov edx, nf_width
mov cl, [esi]
mov ch, cl
mov eax, ecx
shl eax, 16
mov ax, cx
mov cl, [esi+1]
mov ch, cl
mov ebx, ecx
shl ebx, 16
mov bx, cx
mov [edi], eax
mov [edi+4], ebx
mov [edi+edx], eax
mov [edi+edx+4], ebx
lea edi, [edi+edx*2]
mov cl, [esi+2]
mov ch, cl
mov eax, ecx
shl eax, 16
mov ax, cx
mov cl, [esi+3]
mov ch, cl
mov ebx, ecx
shl ebx, 16
mov bx, cx
mov [edi], eax
mov [edi+4], ebx
add edi, edx
mov [edi], eax
mov [edi+4], ebx
sub edi, nfpk_back_right
add esi, 4
retn
;----------------------------------------
ALIGN 4
nf14: ; 8x8x0 (1 byte)
mov bl, [esi] ; Copy color into 8 positions
inc esi
mov bh, bl
mov eax, ebx
shl eax, 16
mov ax, bx
mov ebx, eax
if 0 ;debug
mov eax, 080808080h
mov ebx, eax
endif
jmp nf_solid
retn
;----------------------------------------
ALIGN 4
nf15: ; mix 8x8x0 (2 bytes)
mov bx, [esi] ; Copy 2 colors into 8 positions
add esi, 2 ; in a checkerboard
mov ax, bx
shl eax, 16
mov ax, bx
mov ebx, eax
rol ebx, 8
if 0 ;debug
mov eax, 080808080h
mov ebx, eax
endif
nf_solid:
mov edx, nf_width
mov [edi], eax
mov [edi+4], eax
add edi, edx
mov [edi], ebx
mov [edi+4], ebx
add edi, edx
mov [edi], eax
mov [edi+4], eax
add edi, edx
mov [edi], ebx
mov [edi+4], ebx
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
nfPkDecompD ENDP
endif
;---
.data
; Constant tables
nfhpk_mov4l LABEL DWORD
; low 4x1 in 8x1 (patch +1)
; mov eax, ebx/ecx
MOVH4L_REGS TEXTEQU <!<0c0h+3,0c0h+1!>>
%FOR m4, MOVH4L_REGS
% FOR m3, MOVH4L_REGS
% FOR m2, MOVH4L_REGS
% FOR m1, MOVH4L_REGS
BYTE m1,m2,m3,m4
ENDM
ENDM
ENDM
ENDM
nfhpk_mov8 LABEL DWORD
; 8x1 (each two bits select a pair of colors in a reg)
; low 4x2 in 8x2 (each two bits select a duplicated color in reg)
; (patch +1)
; mov ds:[edi+0/4/8/12], ebx/edx/ecx/ebp
; Note: Patched code specifies mov [ebp+0]... instead
; of mov [edi+0]... to insure that 8-bit offsets are
; used by the assembler even for offset of zero.
;
MOVH8_REGS TEXTEQU <!<3*8,2*8,1*8,5*8!>>
%FOR m4, MOVH8_REGS
% FOR m3, MOVH8_REGS
% FOR m2, MOVH8_REGS
% FOR m1, MOVH8_REGS
BYTE m1+047h,m2+047h,m3+047h,m4+047h
ENDM
ENDM
ENDM
ENDM
nfhpk_mov4 LABEL DWORD
; 4x2 (patch +2)
; mov ax, bx/dx/cx/bp
; low 4x2 in 8x2 (patch +1)
; mov eax, ebx/edx/ecx/ebp
MOVH4_REGS TEXTEQU <!<0c0h+3,0c0h+2,0c0h+1,0c0h+5!>>
%FOR m4, MOVH4_REGS
% FOR m3, MOVH4_REGS
% FOR m2, MOVH4_REGS
% FOR m1, MOVH4_REGS
BYTE m1,m2,m3,m4
ENDM
ENDM
ENDM
ENDM
.code
; Normal version (HiColor)
;
if TRANS16
if 0
Trans16 MACRO dst:req, idx:req, mask
mov dst, [idx]
ifnb <mask>
and dst, 07FFFh
endif
ENDM
elseif 0
Trans16 MACRO dst:req, idx:req, mask
mov dst, [idx]
mov ax, dst
and ax, 0FFE0h
add dst, ax
ENDM
else
EXTERN nf_trans16_lo: WORD
EXTERN nf_trans16_hi: WORD
Trans16 MACRO dst:req, idx:req, mask
xor eax, eax
mov al, [idx]
mov dst, nf_trans16_lo[eax*2]
xor eax, eax
mov al, [idx+1]
or dst, nf_trans16_hi[eax*2]
ENDM
endif
else
Trans16 MACRO dst:req, idx:req, mask
mov dst, [idx]
ifnb <mask>
and dst, 07FFFh
endif
ENDM
endif
nfHPkDecomp PROC USES ESI EDI EBX, \
ops:PTRBYTE, comp:PTRBYTE, \
x:DWORD, y:DWORD, w:DWORD, h:DWORD
LOCAL tbuf: PTRBYTE
LOCAL new_row:DWORD
LOCAL DiffBufPtrs:DWORD
LOCAL nfpk_back_right: DWORD
LOCAL wcnt:DWORD
LOCAL bcomp:PTRBYTE
LOG_LABEL "StartPkDecomp"
.data
nfhpk_OpTbl label dword
dword offset nf0 ; Prev Same (0)
dword offset nf1 ; No change (and copied to screen) (0)
dword offset nf2 ; Near shift from older part of current buf (1)
dword offset nf3 ; Near shift from newer part of current buf (1)
dword offset nf4 ; Near shift from previous buffer (1)
dword offset nf5 ; Far shift from previous buffer (2)
dword offset nf6 ; Far shift from current buffer (2)
; [Or if COMPOPS, run of no changes (0)]
dword offset nf7 ; 8x8x1 (10 bytes) or low 4x4x1 (4 bytes)
dword offset nf8 ; 2x2 4x4x1 (16 bytes) or 2x1 4x8x1 (12 bytes) or 1x2 8x4x1 (12 bytes)
dword offset nf9 ; 8x8x2 (20 bytes) or low 4x4x2 (8 bytes) or
; low 4x8x2 (12 bytes) or low 8x4x2 (12 bytes)
dword offset nf10 ; 2x2 4x4x2 (32 bytes) or 2x1 4x8x2 (24 bytes) or 1x2 4x8x2 (24 bytes)
dword offset nf11 ; 8x8x8 (64 bytes)
dword offset nf12 ; low 4x4x8 (16 bytes)
dword offset nf13 ; 2x2 4x4x0 (ie 2x2x8) (4 bytes)
dword offset nf14 ; 8x8x0 (1 byte)
dword offset nf15 ; mix 8x8x0 (2 bytes)
.code
ifdef SYMANTEC
mov ebx, ds ; Allow DS to access code
mov ecx, 0
mov ax, 3505h
int 21h
endif
NF_DECOMP_INIT 1
mov eax, nf_back_right
sub eax, SWIDTH*2
mov nfpk_back_right, eax
mov esi, comp
mov edi, tbuf
xor eax, eax
mov ax, [esi]
add eax, esi
mov bcomp, eax
add esi, 2
nf_StartRow:
mov eax, w
shr eax, 1
mov wcnt,eax
ALIGN 4
nf_NextPair:
dec wcnt
js nf_NextRow
mov ebx, ops
mov al, [ebx]
inc ebx
mov ops, ebx
xor ebx, ebx
mov bl, al
shr bl, 4
and eax, 0Fh
push offset nf_NextPair
push nfhpk_OpTbl[ebx*4]
jmp nfhpk_OpTbl[eax*4]
nf_NextRow:
add edi, new_row
dec h
jnz nf_StartRow
LOG_LABEL "EndPkDecomp"
ifdef SYMANTEC
mov ebx, ds ; Disable DS from accessing code
mov ecx, offset DGROUP:_data_bottom[-1]
mov ax, 3505h
int 21h
endif
ret
;----------------------------------------
ALIGN 4
nf0: ; No change from previous buffer
mov eax, DiffBufPtrs
jmp nf_shift
;----------------------------------------
ALIGN 4
nf1: ; No change (and copied to screen)
if 0 ;debug
mov ebx, 0
jmp nf_solid
endif
add edi, SWIDTH*2
retn
;----------------------------------------
ALIGN 4
nf2: ; Near shift from older part of current buffer
xor eax, eax
mov ebx, bcomp
inc bcomp
mov al, [ebx]
mov ax, nfpk_ShiftP2[eax*2]
nf_xyc_shift:
xor ebx, ebx
mov bl, ah
shl eax, 24
sar eax, 24-1
add eax, nfpk_ShiftY[ebx*4]
jmp nf_shift
;----------------------------------------
ALIGN 4
nf3: ; Near shift from newer part of current buffer
xor eax, eax
mov ebx, bcomp
inc bcomp
mov al, [ebx]
mov ax, nfpk_ShiftP2[eax*2]
neg al
neg ah
jmp nf_xyc_shift
;----------------------------------------
ALIGN 4
nf4: ; Near shift from previous buffer
xor eax, eax
mov ebx, bcomp
inc bcomp
mov al, [ebx]
mov ax, nfpk_ShiftP1[eax*2]
jmp nf_xyp_shift
;----------------------------------------
ALIGN 4
nf5: ; Far shift from previous buffer
mov ax, [esi]
add esi, 2
nf_xyp_shift:
xor ebx, ebx
mov bl, ah
shl eax, 24
sar eax, 24-1
add eax, nfpk_ShiftY[ebx*4]
add eax, DiffBufPtrs
jmp nf_shift
;----------------------------------------
ALIGN 4
nf6: ; Far shift from current buffer
mov ax, [esi]
add esi, 2
jmp nf_xyc_shift
;----------------------------------------
ALIGN 4
nf_shift:
if 0 ;debug
mov ebx, 0
jmp nf_solid
endif
mov ebx, esi ; save esi
lea esi, [edi+eax]
mov edx, nf_width
REPEAT 7
mov eax, [esi]
mov [edi], eax
mov eax, [esi+4]
mov [edi+4], eax
mov eax, [esi+8]
mov [edi+8], eax
mov eax, [esi+12]
mov [edi+12], eax
add esi, edx
add edi, edx
ENDM
mov eax, [esi]
mov [edi], eax
mov eax, [esi+4]
mov [edi+4], eax
mov eax, [esi+8]
mov [edi+8], eax
mov eax, [esi+12]
mov [edi+12], eax
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
mov esi, ebx ; restore esi
retn
;----------------------------------------
ALIGN 4
nf7: ; 8x8x1 (12 bytes)
test word ptr [esi], 08000h
jnz nf23
if 0 ;debug
add esi, 12
mov ebx, 0
jmp nf_solid
endif
xor eax, eax
lea ecx, nfhpk_mov8
lea edx, byte ptr ds:nf7_11+1
mov al, [esi+4]
mov ebx, [ecx+eax*4]
mov [edx+(nf7_11-nf7_11)], bl
mov [edx+(nf7_12-nf7_11)], bh
shr ebx, 16
mov [edx+(nf7_13-nf7_11)], bl
mov [edx+(nf7_14-nf7_11)], bh
mov al, [esi+5]
mov ebx, [ecx+eax*4]
mov [edx+(nf7_21-nf7_11)], bl
mov [edx+(nf7_22-nf7_11)], bh
shr ebx, 16
mov [edx+(nf7_23-nf7_11)], bl
mov [edx+(nf7_24-nf7_11)], bh
mov al, [esi+6]
mov ebx, [ecx+eax*4]
mov [edx+(nf7_31-nf7_11)], bl
mov [edx+(nf7_32-nf7_11)], bh
shr ebx, 16
mov [edx+(nf7_33-nf7_11)], bl
mov [edx+(nf7_34-nf7_11)], bh
mov al, [esi+7]
mov ebx, [ecx+eax*4]
mov [edx+(nf7_41-nf7_11)], bl
mov [edx+(nf7_42-nf7_11)], bh
shr ebx, 16
mov [edx+(nf7_43-nf7_11)], bl
mov [edx+(nf7_44-nf7_11)], bh
lea edx, [edx+(nf7_51-nf7_11)]
mov al, [esi+8]
mov ebx, [ecx+eax*4]
mov [edx+(nf7_51-nf7_51)], bl
mov [edx+(nf7_52-nf7_51)], bh
shr ebx, 16
mov [edx+(nf7_53-nf7_51)], bl
mov [edx+(nf7_54-nf7_51)], bh
mov al, [esi+9]
mov ebx, [ecx+eax*4]
mov [edx+(nf7_61-nf7_51)], bl
mov [edx+(nf7_62-nf7_51)], bh
shr ebx, 16
mov [edx+(nf7_63-nf7_51)], bl
mov [edx+(nf7_64-nf7_51)], bh
mov al, [esi+10]
mov ebx, [ecx+eax*4]
mov [edx+(nf7_71-nf7_51)], bl
mov [edx+(nf7_72-nf7_51)], bh
shr ebx, 16
mov [edx+(nf7_73-nf7_51)], bl
mov [edx+(nf7_74-nf7_51)], bh
mov al, [esi+11]
mov ebx, [ecx+eax*4]
mov [edx+(nf7_81-nf7_51)], bl
mov [edx+(nf7_82-nf7_51)], bh
shr ebx, 16
mov [edx+(nf7_83-nf7_51)], bl
mov [edx+(nf7_84-nf7_51)], bh
push ebp
push esi
; load ebx,edx,ecx,ebp with 00,01,10,11 color combinations
; (note that bits are read least significant first).
if TRANS16
Trans16 cx, esi+2
shl ecx, 16
Trans16 cx, esi
else
mov ecx, [esi]
endif
mov esi,nf_width
mov edx, ecx
ror edx, 16
mov ebx, edx
mov bx, cx
mov ebp, ecx
mov bp, dx
jmp nf7_0 ; flush prefetch
ALIGN 4
nf7_0:
nf7_11: mov [ebp+0], ebx
nf7_12: mov [ebp+4], ebx
nf7_13: mov [ebp+8], ebx
nf7_14: mov [ebp+12], ebx
add edi, esi
nf7_21: mov [ebp+0], ebx
nf7_22: mov [ebp+4], ebx
nf7_23: mov [ebp+8], ebx
nf7_24: mov [ebp+12], ebx
add edi, esi
nf7_31: mov [ebp+0], ebx
nf7_32: mov [ebp+4], ebx
nf7_33: mov [ebp+8], ebx
nf7_34: mov [ebp+12], ebx
add edi, esi
nf7_41: mov [ebp+0], ebx
nf7_42: mov [ebp+4], ebx
nf7_43: mov [ebp+8], ebx
nf7_44: mov [ebp+12], ebx
add edi, esi
nf7_51: mov [ebp+0], ebx
nf7_52: mov [ebp+4], ebx
nf7_53: mov [ebp+8], ebx
nf7_54: mov [ebp+12], ebx
add edi, esi
nf7_61: mov [ebp+0], ebx
nf7_62: mov [ebp+4], ebx
nf7_63: mov [ebp+8], ebx
nf7_64: mov [ebp+12], ebx
add edi, esi
nf7_71: mov [ebp+0], ebx
nf7_72: mov [ebp+4], ebx
nf7_73: mov [ebp+8], ebx
nf7_74: mov [ebp+12], ebx
add edi, esi
nf7_81: mov [ebp+0], ebx
nf7_82: mov [ebp+4], ebx
nf7_83: mov [ebp+8], ebx
nf7_84: mov [ebp+12], ebx
pop esi
pop ebp
add esi, 12
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
;----------------------------------------
ALIGN 4
;nf7+16
nf23: ; low 4x4x1 (6 bytes)
if 0 ;debug
add esi, 6
mov ebx, 0
jmp nf_solid
endif
xor eax, eax
lea ecx, nfhpk_mov4l
lea edx, byte ptr ds:nf23_11+1
mov al, [esi+4]
and al, 0fH
mov ebx, [ecx+eax*4]
mov [edx+(nf23_11-nf23_11)], bl
mov [edx+(nf23_12-nf23_11)], bh
shr ebx, 16
mov [edx+(nf23_13-nf23_11)], bl
mov [edx+(nf23_14-nf23_11)], bh
mov al, [esi+4]
shr al, 4
mov ebx, [ecx+eax*4]
mov [edx+(nf23_31-nf23_11)], bl
mov [edx+(nf23_32-nf23_11)], bh
shr ebx, 16
mov [edx+(nf23_33-nf23_11)], bl
mov [edx+(nf23_34-nf23_11)], bh
mov al, [esi+5]
and al, 0fH
mov ebx, [ecx+eax*4]
mov [edx+(nf23_51-nf23_11)], bl
mov [edx+(nf23_52-nf23_11)], bh
shr ebx, 16
mov [edx+(nf23_53-nf23_11)], bl
mov [edx+(nf23_54-nf23_11)], bh
mov al, [esi+5]
shr al, 4
mov ebx, [ecx+eax*4]
mov [edx+(nf23_71-nf23_11)], bl
mov [edx+(nf23_72-nf23_11)], bh
shr ebx, 16
mov [edx+(nf23_73-nf23_11)], bl
mov [edx+(nf23_74-nf23_11)], bh
mov edx, nf_width
; load ebx,ecx with 00,11 color combinations
if TRANS16
Trans16 cx, esi, 1
shrd ebx, ecx, 16
mov bx, cx
Trans16 cx, esi+2
shrd eax, ecx, 16
mov ax, cx
mov ecx, eax
else
mov ebx, [esi]
and ebx, 07FFF7FFFh
mov ecx, ebx
ror ebx, 16
xchg bx,cx
endif
jmp nf23_0 ; flush prefetch
ALIGN 4
nf23_0:
nf23_11:mov eax, ebx
mov [edi], eax
mov [edi+edx], eax
nf23_12:mov eax, ebx
mov [edi+4], eax
mov [edi+edx+4], eax
nf23_13:mov eax, ebx
mov [edi+8], eax
mov [edi+edx+8], eax
nf23_14:mov eax, ebx
mov [edi+12], eax
mov [edi+edx+12], eax
lea edi, [edi+edx*2]
nf23_31:mov eax, ebx
mov [edi], eax
mov [edi+edx], eax
nf23_32:mov eax, ebx
mov [edi+4], eax
mov [edi+edx+4], eax
nf23_33:mov eax, ebx
mov [edi+8], eax
mov [edi+edx+8], eax
nf23_34:mov eax, ebx
mov [edi+12], eax
mov [edi+edx+12], eax
lea edi, [edi+edx*2]
nf23_51:mov eax, ebx
mov [edi], eax
mov [edi+edx], eax
nf23_52:mov eax, ebx
mov [edi+4], eax
mov [edi+edx+4], eax
nf23_53:mov eax, ebx
mov [edi+8], eax
mov [edi+edx+8], eax
nf23_54:mov eax, ebx
mov [edi+12], eax
mov [edi+edx+12], eax
lea edi, [edi+edx*2]
nf23_71:mov eax, ebx
mov [edi], eax
mov [edi+edx], eax
nf23_72:mov eax, ebx
mov [edi+4], eax
mov [edi+edx+4], eax
nf23_73:mov eax, ebx
mov [edi+8], eax
mov [edi+edx+8], eax
nf23_74:mov eax, ebx
mov [edi+12], eax
mov [edi+edx+12], eax
add edi, edx
sub edi, nfpk_back_right
add esi, 6
retn
;----------------------------------------
ALIGN 4
nf8: ; 2x2 4x4x1 (24 bytes)
test word ptr [esi], 08000h
jnz nf24
if 0 ;debug
add esi, 24
mov ebx, 0
jmp nf_solid
endif
xor eax, eax
lea ecx, nfhpk_mov8
lea edx, byte ptr ds:nf8_11+1
mov al, [esi+4]
mov ebx, [ecx+eax*4]
mov [edx+(nf8_11-nf8_11)], bl
mov [edx+(nf8_12-nf8_11)], bh
shr ebx, 16
mov [edx+(nf8_13-nf8_11)], bl
mov [edx+(nf8_14-nf8_11)], bh
mov al, [esi+5]
mov ebx, [ecx+eax*4]
mov [edx+(nf8_21-nf8_11)], bl
mov [edx+(nf8_22-nf8_11)], bh
shr ebx, 16
mov [edx+(nf8_23-nf8_11)], bl
mov [edx+(nf8_24-nf8_11)], bh
mov al, [esi+10]
mov ebx, [ecx+eax*4]
mov [edx+(nf8_31-nf8_11)], bl
mov [edx+(nf8_32-nf8_11)], bh
shr ebx, 16
mov [edx+(nf8_33-nf8_11)], bl
mov [edx+(nf8_34-nf8_11)], bh
mov al, [esi+11]
mov ebx, [ecx+eax*4]
mov [edx+(nf8_41-nf8_11)], bl
mov [edx+(nf8_42-nf8_11)], bh
shr ebx, 16
mov [edx+(nf8_43-nf8_11)], bl
mov [edx+(nf8_44-nf8_11)], bh
add edx, nf8_51-nf8_11
mov al, [esi+16]
mov ebx, [ecx+eax*4]
mov [edx+(nf8_51-nf8_51)], bl
mov [edx+(nf8_52-nf8_51)], bh
shr ebx, 16
mov [edx+(nf8_53-nf8_51)], bl
mov [edx+(nf8_54-nf8_51)], bh
mov al, [esi+17]
mov ebx, [ecx+eax*4]
mov [edx+(nf8_61-nf8_51)], bl
mov [edx+(nf8_62-nf8_51)], bh
shr ebx, 16
mov [edx+(nf8_63-nf8_51)], bl
mov [edx+(nf8_64-nf8_51)], bh
mov al, [esi+22]
mov ebx, [ecx+eax*4]
mov [edx+(nf8_71-nf8_51)], bl
mov [edx+(nf8_72-nf8_51)], bh
shr ebx, 16
mov [edx+(nf8_73-nf8_51)], bl
mov [edx+(nf8_74-nf8_51)], bh
mov al, [esi+23]
mov ebx, [ecx+eax*4]
mov [edx+(nf8_81-nf8_51)], bl
mov [edx+(nf8_82-nf8_51)], bh
shr ebx, 16
mov [edx+(nf8_83-nf8_51)], bl
mov [edx+(nf8_84-nf8_51)], bh
push ebp
push esi
; load ebx,edx,ecx,ebp with 00,01,10,11 color combinations
; (note that bits are read least significant first).
if TRANS16
Trans16 cx, esi+18+2
shl ecx, 16
Trans16 cx, esi+18
push ecx
Trans16 cx, esi+12+2
shl ecx, 16
Trans16 cx, esi+12
push ecx
Trans16 cx, esi+6+2
shl ecx, 16
Trans16 cx, esi+6
push ecx
Trans16 cx, esi+2
shl ecx, 16
Trans16 cx, esi
else
mov ecx, [esi]
endif
mov esi,nf_width
mov edx, ecx
ror edx, 16
mov ebx, edx
mov bx, cx
mov ebp, ecx
mov bp, dx
jmp nf8_0 ; flush prefetch
ALIGN 4
nf8_0:
nf8_11: mov [ebp+0], ebx
nf8_12: mov [ebp+4], ebx
add edi, esi
nf8_13: mov [ebp+0], ebx
nf8_14: mov [ebp+4], ebx
add edi, esi
nf8_21: mov [ebp+0], ebx
nf8_22: mov [ebp+4], ebx
add edi, esi
nf8_23: mov [ebp+0], ebx
nf8_24: mov [ebp+4], ebx
add edi, esi
if TRANS16
pop ecx
else
mov eax, [esp]
mov ecx, [eax+6]
endif
mov edx, ecx
ror edx, 16
mov ebx, edx
mov bx, cx
mov ebp, ecx
mov bp, dx
nf8_31: mov [ebp+0], ebx
nf8_32: mov [ebp+4], ebx
add edi, esi
nf8_33: mov [ebp+0], ebx
nf8_34: mov [ebp+4], ebx
add edi, esi
nf8_41: mov [ebp+0], ebx
nf8_42: mov [ebp+4], ebx
add edi, esi
nf8_43: mov [ebp+0], ebx
nf8_44: mov [ebp+4], ebx
add edi, esi
lea eax, [esi*8-8]
sub edi, eax
if TRANS16
pop ecx
else
mov eax, [esp]
mov ecx, [eax+12]
endif
mov edx, ecx
ror edx, 16
mov ebx, edx
mov bx, cx
mov ebp, ecx
mov bp, dx
nf8_51: mov [ebp+0], ebx
nf8_52: mov [ebp+4], ebx
add edi, esi
nf8_53: mov [ebp+0], ebx
nf8_54: mov [ebp+4], ebx
add edi, esi
nf8_61: mov [ebp+0], ebx
nf8_62: mov [ebp+4], ebx
add edi, esi
nf8_63: mov [ebp+0], ebx
nf8_64: mov [ebp+4], ebx
add edi, esi
if TRANS16
pop ecx
else
mov eax, [esp]
mov ecx, [eax+18]
endif
mov edx, ecx
ror edx, 16
mov ebx, edx
mov bx, cx
mov ebp, ecx
mov bp, dx
nf8_71: mov [ebp+0], ebx
nf8_72: mov [ebp+4], ebx
add edi, esi
nf8_73: mov [ebp+0], ebx
nf8_74: mov [ebp+4], ebx
add edi, esi
nf8_81: mov [ebp+0], ebx
nf8_82: mov [ebp+4], ebx
add edi, esi
nf8_83: mov [ebp+0], ebx
nf8_84: mov [ebp+4], ebx
pop esi
pop ebp
add esi, 24
sub edi, 8
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
;----------------------------------------
ALIGN 4
;nf8+16
nf24: ; 2x1 4x8x1 (16 bytes)
test word ptr [esi+8], 08000h
jnz nf40
if 0 ;debug
add esi, 16
mov ebx, 0
jmp nf_solid
endif
xor eax, eax
lea ecx, nfhpk_mov8
lea edx, byte ptr ds:nf24_11+1
mov al, [esi+4]
mov ebx, [ecx+eax*4]
mov [edx+(nf24_11-nf24_11)], bl
mov [edx+(nf24_12-nf24_11)], bh
shr ebx, 16
mov [edx+(nf24_13-nf24_11)], bl
mov [edx+(nf24_14-nf24_11)], bh
mov al, [esi+5]
mov ebx, [ecx+eax*4]
mov [edx+(nf24_21-nf24_11)], bl
mov [edx+(nf24_22-nf24_11)], bh
shr ebx, 16
mov [edx+(nf24_23-nf24_11)], bl
mov [edx+(nf24_24-nf24_11)], bh
mov al, [esi+6]
mov ebx, [ecx+eax*4]
mov [edx+(nf24_31-nf24_11)], bl
mov [edx+(nf24_32-nf24_11)], bh
shr ebx, 16
mov [edx+(nf24_33-nf24_11)], bl
mov [edx+(nf24_34-nf24_11)], bh
mov al, [esi+7]
mov ebx, [ecx+eax*4]
mov [edx+(nf24_41-nf24_11)], bl
mov [edx+(nf24_42-nf24_11)], bh
shr ebx, 16
mov [edx+(nf24_43-nf24_11)], bl
mov [edx+(nf24_44-nf24_11)], bh
add edx, nf24_51-nf24_11
mov al, [esi+12]
mov ebx, [ecx+eax*4]
mov [edx+(nf24_51-nf24_51)], bl
mov [edx+(nf24_52-nf24_51)], bh
shr ebx, 16
mov [edx+(nf24_53-nf24_51)], bl
mov [edx+(nf24_54-nf24_51)], bh
mov al, [esi+13]
mov ebx, [ecx+eax*4]
mov [edx+(nf24_61-nf24_51)], bl
mov [edx+(nf24_62-nf24_51)], bh
shr ebx, 16
mov [edx+(nf24_63-nf24_51)], bl
mov [edx+(nf24_64-nf24_51)], bh
mov al, [esi+14]
mov ebx, [ecx+eax*4]
mov [edx+(nf24_71-nf24_51)], bl
mov [edx+(nf24_72-nf24_51)], bh
shr ebx, 16
mov [edx+(nf24_73-nf24_51)], bl
mov [edx+(nf24_74-nf24_51)], bh
mov al, [esi+15]
mov ebx, [ecx+eax*4]
mov [edx+(nf24_81-nf24_51)], bl
mov [edx+(nf24_82-nf24_51)], bh
shr ebx, 16
mov [edx+(nf24_83-nf24_51)], bl
mov [edx+(nf24_84-nf24_51)], bh
push ebp
push esi
; load ebx,edx,ecx,ebp with 00,01,10,11 color combinations
; (note that bits are read least significant first).
if TRANS16
Trans16 cx, esi+8+2
shl ecx, 16
Trans16 cx, esi+8
push ecx
Trans16 cx, esi+2
shl ecx, 16
Trans16 cx, esi, 1
else
mov ecx, [esi]
and ecx, 07FFF7FFFh
endif
mov esi,nf_width
mov edx, ecx
ror edx, 16
mov ebx, edx
mov bx, cx
mov ebp, ecx
mov bp, dx
jmp nf24_0 ; flush prefetch
ALIGN 4
nf24_0:
nf24_11:mov [ebp+0], ebx
nf24_12:mov [ebp+4], ebx
add edi, esi
nf24_13:mov [ebp+0], ebx
nf24_14:mov [ebp+4], ebx
add edi, esi
nf24_21:mov [ebp+0], ebx
nf24_22:mov [ebp+4], ebx
add edi, esi
nf24_23:mov [ebp+0], ebx
nf24_24:mov [ebp+4], ebx
add edi, esi
nf24_31:mov [ebp+0], ebx
nf24_32:mov [ebp+4], ebx
add edi, esi
nf24_33:mov [ebp+0], ebx
nf24_34:mov [ebp+4], ebx
add edi, esi
nf24_41:mov [ebp+0], ebx
nf24_42:mov [ebp+4], ebx
add edi, esi
nf24_43:mov [ebp+0], ebx
nf24_44:mov [ebp+4], ebx
add edi, esi
lea eax, [esi*8-8]
sub edi, eax
if TRANS16
pop ecx
else
mov eax, [esp]
mov ecx, [eax+8]
endif
mov edx, ecx
ror edx, 16
mov ebx, edx
mov bx, cx
mov ebp, ecx
mov bp, dx
nf24_51:mov [ebp+0], ebx
nf24_52:mov [ebp+4], ebx
add edi, esi
nf24_53:mov [ebp+0], ebx
nf24_54:mov [ebp+4], ebx
add edi, esi
nf24_61:mov [ebp+0], ebx
nf24_62:mov [ebp+4], ebx
add edi, esi
nf24_63:mov [ebp+0], ebx
nf24_64:mov [ebp+4], ebx
add edi, esi
nf24_71:mov [ebp+0], ebx
nf24_72:mov [ebp+4], ebx
add edi, esi
nf24_73:mov [ebp+0], ebx
nf24_74:mov [ebp+4], ebx
add edi, esi
nf24_81:mov [ebp+0], ebx
nf24_82:mov [ebp+4], ebx
add edi, esi
nf24_83:mov [ebp+0], ebx
nf24_84:mov [ebp+4], ebx
pop esi
pop ebp
add esi, 16
sub edi, 8
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
;----------------------------------------
ALIGN 4
;nf8+32
nf40: ; 1x2 8x4x1 (16 bytes)
if 0 ;debug
add esi, 16
mov ebx, 0
jmp nf_solid
endif
xor eax, eax
lea ecx, nfhpk_mov8
lea edx, byte ptr ds:nf40_11+1
mov al, [esi+4]
mov ebx, [ecx+eax*4]
mov [edx+(nf40_11-nf40_11)], bl
mov [edx+(nf40_12-nf40_11)], bh
shr ebx, 16
mov [edx+(nf40_13-nf40_11)], bl
mov [edx+(nf40_14-nf40_11)], bh
mov al, [esi+5]
mov ebx, [ecx+eax*4]
mov [edx+(nf40_21-nf40_11)], bl
mov [edx+(nf40_22-nf40_11)], bh
shr ebx, 16
mov [edx+(nf40_23-nf40_11)], bl
mov [edx+(nf40_24-nf40_11)], bh
mov al, [esi+6]
mov ebx, [ecx+eax*4]
mov [edx+(nf40_31-nf40_11)], bl
mov [edx+(nf40_32-nf40_11)], bh
shr ebx, 16
mov [edx+(nf40_33-nf40_11)], bl
mov [edx+(nf40_34-nf40_11)], bh
mov al, [esi+7]
mov ebx, [ecx+eax*4]
mov [edx+(nf40_41-nf40_11)], bl
mov [edx+(nf40_42-nf40_11)], bh
shr ebx, 16
mov [edx+(nf40_43-nf40_11)], bl
mov [edx+(nf40_44-nf40_11)], bh
add edx, nf40_51-nf40_11
mov al, [esi+12]
mov ebx, [ecx+eax*4]
mov [edx+(nf40_51-nf40_51)], bl
mov [edx+(nf40_52-nf40_51)], bh
shr ebx, 16
mov [edx+(nf40_53-nf40_51)], bl
mov [edx+(nf40_54-nf40_51)], bh
mov al, [esi+13]
mov ebx, [ecx+eax*4]
mov [edx+(nf40_61-nf40_51)], bl
mov [edx+(nf40_62-nf40_51)], bh
shr ebx, 16
mov [edx+(nf40_63-nf40_51)], bl
mov [edx+(nf40_64-nf40_51)], bh
mov al, [esi+14]
mov ebx, [ecx+eax*4]
mov [edx+(nf40_71-nf40_51)], bl
mov [edx+(nf40_72-nf40_51)], bh
shr ebx, 16
mov [edx+(nf40_73-nf40_51)], bl
mov [edx+(nf40_74-nf40_51)], bh
mov al, [esi+15]
mov ebx, [ecx+eax*4]
mov [edx+(nf40_81-nf40_51)], bl
mov [edx+(nf40_82-nf40_51)], bh
shr ebx, 16
mov [edx+(nf40_83-nf40_51)], bl
mov [edx+(nf40_84-nf40_51)], bh
push ebp
push esi
; load ebx,edx,ecx,ebp with 00,01,10,11 color combinations
; (note that bits are read least significant first).
if TRANS16
Trans16 cx, esi+8+2
shl ecx, 16
Trans16 cx, esi+8, 1
push ecx
Trans16 cx, esi+2
shl ecx, 16
Trans16 cx, esi, 1
else
mov ecx, [esi]
and ecx, 07FFF7FFFh
endif
mov esi,nf_width
mov edx, ecx
ror edx, 16
mov ebx, edx
mov bx, cx
mov ebp, ecx
mov bp, dx
jmp nf40_0 ; flush prefetch
ALIGN 4
nf40_0:
nf40_11:mov [ebp+0], ebx
nf40_12:mov [ebp+4], ebx
nf40_13:mov [ebp+8], ebx
nf40_14:mov [ebp+12], ebx
add edi, esi
nf40_21:mov [ebp+0], ebx
nf40_22:mov [ebp+4], ebx
nf40_23:mov [ebp+8], ebx
nf40_24:mov [ebp+12], ebx
add edi, esi
nf40_31:mov [ebp+0], ebx
nf40_32:mov [ebp+4], ebx
nf40_33:mov [ebp+8], ebx
nf40_34:mov [ebp+12], ebx
add edi, esi
nf40_41:mov [ebp+0], ebx
nf40_42:mov [ebp+4], ebx
nf40_43:mov [ebp+8], ebx
nf40_44:mov [ebp+12], ebx
add edi, esi
if TRANS16
pop ecx
else
mov eax, [esp]
mov ecx, [eax+8]
and ecx, 07FFF7FFFh
endif
mov edx, ecx
ror edx, 16
mov ebx, edx
mov bx, cx
mov ebp, ecx
mov bp, dx
nf40_51:mov [ebp+0], ebx
nf40_52:mov [ebp+4], ebx
nf40_53:mov [ebp+8], ebx
nf40_54:mov [ebp+12], ebx
add edi, esi
nf40_61:mov [ebp+0], ebx
nf40_62:mov [ebp+4], ebx
nf40_63:mov [ebp+8], ebx
nf40_64:mov [ebp+12], ebx
add edi, esi
nf40_71:mov [ebp+0], ebx
nf40_72:mov [ebp+4], ebx
nf40_73:mov [ebp+8], ebx
nf40_74:mov [ebp+12], ebx
add edi, esi
nf40_81:mov [ebp+0], ebx
nf40_82:mov [ebp+4], ebx
nf40_83:mov [ebp+8], ebx
nf40_84:mov [ebp+12], ebx
pop esi
pop ebp
add esi, 16
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
;----------------------------------------
ALIGN 4
nf9: ; 8x8x2 (24 bytes)
test word ptr [esi], 08000h
jnz nf41
test word ptr [esi+4], 08000h
jnz nf25
if 0 ;debug
add esi, 24
mov ebx, 0
jmp nf_solid
endif
xor eax, eax
lea ecx, nfhpk_mov4
lea edx, byte ptr ds:nf9_11+2
mov al, [esi+8]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_11-nf9_11)], bh
mov [edx+(nf9_12-nf9_11)], bl
shr ebx, 16
mov [edx+(nf9_13-nf9_11)], bh
mov [edx+(nf9_14-nf9_11)], bl
mov al, [esi+9]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_15-nf9_11)], bh
mov [edx+(nf9_16-nf9_11)], bl
shr ebx, 16
mov [edx+(nf9_17-nf9_11)], bh
mov [edx+(nf9_18-nf9_11)], bl
mov al, [esi+10]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_21-nf9_11)], bh
mov [edx+(nf9_22-nf9_11)], bl
shr ebx, 16
mov [edx+(nf9_23-nf9_11)], bh
mov [edx+(nf9_24-nf9_11)], bl
mov al, [esi+11]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_25-nf9_11)], bh
mov [edx+(nf9_26-nf9_11)], bl
shr ebx, 16
mov [edx+(nf9_27-nf9_11)], bh
mov [edx+(nf9_28-nf9_11)], bl
mov al, [esi+12]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_31-nf9_11)], bh
mov [edx+(nf9_32-nf9_11)], bl
shr ebx, 16
mov [edx+(nf9_33-nf9_11)], bh
mov [edx+(nf9_34-nf9_11)], bl
mov al, [esi+13]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_35-nf9_11)], bh
mov [edx+(nf9_36-nf9_11)], bl
shr ebx, 16
mov [edx+(nf9_37-nf9_11)], bh
mov [edx+(nf9_38-nf9_11)], bl
mov al, [esi+14]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_41-nf9_11)], bh
mov [edx+(nf9_42-nf9_11)], bl
shr ebx, 16
mov [edx+(nf9_43-nf9_11)], bh
mov [edx+(nf9_44-nf9_11)], bl
mov al, [esi+15]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_45-nf9_11)], bh
mov [edx+(nf9_46-nf9_11)], bl
shr ebx, 16
mov [edx+(nf9_47-nf9_11)], bh
mov [edx+(nf9_48-nf9_11)], bl
lea edx, [edx+(nf9_51-nf9_11)]
mov al, [esi+16]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_51-nf9_51)], bh
mov [edx+(nf9_52-nf9_51)], bl
shr ebx, 16
mov [edx+(nf9_53-nf9_51)], bh
mov [edx+(nf9_54-nf9_51)], bl
mov al, [esi+17]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_55-nf9_51)], bh
mov [edx+(nf9_56-nf9_51)], bl
shr ebx, 16
mov [edx+(nf9_57-nf9_51)], bh
mov [edx+(nf9_58-nf9_51)], bl
mov al, [esi+18]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_61-nf9_51)], bh
mov [edx+(nf9_62-nf9_51)], bl
shr ebx, 16
mov [edx+(nf9_63-nf9_51)], bh
mov [edx+(nf9_64-nf9_51)], bl
mov al, [esi+19]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_65-nf9_51)], bh
mov [edx+(nf9_66-nf9_51)], bl
shr ebx, 16
mov [edx+(nf9_67-nf9_51)], bh
mov [edx+(nf9_68-nf9_51)], bl
mov al, [esi+20]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_71-nf9_51)], bh
mov [edx+(nf9_72-nf9_51)], bl
shr ebx, 16
mov [edx+(nf9_73-nf9_51)], bh
mov [edx+(nf9_74-nf9_51)], bl
mov al, [esi+21]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_75-nf9_51)], bh
mov [edx+(nf9_76-nf9_51)], bl
shr ebx, 16
mov [edx+(nf9_77-nf9_51)], bh
mov [edx+(nf9_78-nf9_51)], bl
mov al, [esi+22]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_81-nf9_51)], bh
mov [edx+(nf9_82-nf9_51)], bl
shr ebx, 16
mov [edx+(nf9_83-nf9_51)], bh
mov [edx+(nf9_84-nf9_51)], bl
mov al, [esi+23]
mov ebx, [ecx+eax*4]
mov [edx+(nf9_85-nf9_51)], bh
mov [edx+(nf9_86-nf9_51)], bl
shr ebx, 16
mov [edx+(nf9_87-nf9_51)], bh
mov [edx+(nf9_88-nf9_51)], bl
push ebp
push esi
; Load bx,dx,cx,bp with four colors
if TRANS16
Trans16 bx, esi
Trans16 dx, esi+2
Trans16 cx, esi+4
Trans16 bp, esi+6
else
mov bx, [esi]
mov dx, [esi+2]
mov cx, [esi+4]
mov bp, [esi+6]
endif
mov esi, nf_width
jmp nf9_0 ; flush prefetch
ALIGN 4
nf9_0:
nf9_11: mov ax, bx
shl eax, 16
nf9_12: mov ax, bx
mov [edi], eax
nf9_13: mov ax, bx
shl eax, 16
nf9_14: mov ax, bx
mov [edi+4], eax
nf9_15: mov ax, bx
shl eax, 16
nf9_16: mov ax, bx
mov [edi+8], eax
nf9_17: mov ax, bx
shl eax, 16
nf9_18: mov ax, bx
mov [edi+12], eax
add edi, esi
nf9_21: mov ax, bx
shl eax, 16
nf9_22: mov ax, bx
mov [edi], eax
nf9_23: mov ax, bx
shl eax, 16
nf9_24: mov ax, bx
mov [edi+4], eax
nf9_25: mov ax, bx
shl eax, 16
nf9_26: mov ax, bx
mov [edi+8], eax
nf9_27: mov ax, bx
shl eax, 16
nf9_28: mov ax, bx
mov [edi+12], eax
add edi, esi
nf9_31: mov ax, bx
shl eax, 16
nf9_32: mov ax, bx
mov [edi], eax
nf9_33: mov ax, bx
shl eax, 16
nf9_34: mov ax, bx
mov [edi+4], eax
nf9_35: mov ax, bx
shl eax, 16
nf9_36: mov ax, bx
mov [edi+8], eax
nf9_37: mov ax, bx
shl eax, 16
nf9_38: mov ax, bx
mov [edi+12], eax
add edi, esi
nf9_41: mov ax, bx
shl eax, 16
nf9_42: mov ax, bx
mov [edi], eax
nf9_43: mov ax, bx
shl eax, 16
nf9_44: mov ax, bx
mov [edi+4], eax
nf9_45: mov ax, bx
shl eax, 16
nf9_46: mov ax, bx
mov [edi+8], eax
nf9_47: mov ax, bx
shl eax, 16
nf9_48: mov ax, bx
mov [edi+12], eax
add edi, esi
nf9_51: mov ax, bx
shl eax, 16
nf9_52: mov ax, bx
mov [edi], eax
nf9_53: mov ax, bx
shl eax, 16
nf9_54: mov ax, bx
mov [edi+4], eax
nf9_55: mov ax, bx
shl eax, 16
nf9_56: mov ax, bx
mov [edi+8], eax
nf9_57: mov ax, bx
shl eax, 16
nf9_58: mov ax, bx
mov [edi+12], eax
add edi, esi
nf9_61: mov ax, bx
shl eax, 16
nf9_62: mov ax, bx
mov [edi], eax
nf9_63: mov ax, bx
shl eax, 16
nf9_64: mov ax, bx
mov [edi+4], eax
nf9_65: mov ax, bx
shl eax, 16
nf9_66: mov ax, bx
mov [edi+8], eax
nf9_67: mov ax, bx
shl eax, 16
nf9_68: mov ax, bx
mov [edi+12], eax
add edi, esi
nf9_71: mov ax, bx
shl eax, 16
nf9_72: mov ax, bx
mov [edi], eax
nf9_73: mov ax, bx
shl eax, 16
nf9_74: mov ax, bx
mov [edi+4], eax
nf9_75: mov ax, bx
shl eax, 16
nf9_76: mov ax, bx
mov [edi+8], eax
nf9_77: mov ax, bx
shl eax, 16
nf9_78: mov ax, bx
mov [edi+12], eax
add edi, esi
nf9_81: mov ax, bx
shl eax, 16
nf9_82: mov ax, bx
mov [edi], eax
nf9_83: mov ax, bx
shl eax, 16
nf9_84: mov ax, bx
mov [edi+4], eax
nf9_85: mov ax, bx
shl eax, 16
nf9_86: mov ax, bx
mov [edi+8], eax
nf9_87: mov ax, bx
shl eax, 16
nf9_88: mov ax, bx
mov [edi+12], eax
pop esi
pop ebp
add esi, 24
sub edi, nfpk_back_right
retn
;----------------------------------------
ALIGN 4
;nf9+16
nf25: ; low 4x4x2 (12 bytes)
if 0 ;debug
add esi, 12
mov ebx, 0
jmp nf_solid
endif
xor eax, eax
lea ecx, nfhpk_mov4
lea edx, byte ptr ds:nf25_11+1
mov al, [esi+8]
mov ebx, [ecx+eax*4]
mov [edx+(nf25_11-nf25_11)], bl
mov [edx+(nf25_12-nf25_11)], bh
shr ebx, 16
mov [edx+(nf25_13-nf25_11)], bl
mov [edx+(nf25_14-nf25_11)], bh
mov al, [esi+9]
mov ebx, [ecx+eax*4]
mov [edx+(nf25_21-nf25_11)], bl
mov [edx+(nf25_22-nf25_11)], bh
shr ebx, 16
mov [edx+(nf25_23-nf25_11)], bl
mov [edx+(nf25_24-nf25_11)], bh
mov al, [esi+10]
mov ebx, [ecx+eax*4]
mov [edx+(nf25_31-nf25_11)], bl
mov [edx+(nf25_32-nf25_11)], bh
shr ebx, 16
mov [edx+(nf25_33-nf25_11)], bl
mov [edx+(nf25_34-nf25_11)], bh
mov al, [esi+11]
mov ebx, [ecx+eax*4]
mov [edx+(nf25_41-nf25_11)], bl
mov [edx+(nf25_42-nf25_11)], bh
shr ebx, 16
mov [edx+(nf25_43-nf25_11)], bl
mov [edx+(nf25_44-nf25_11)], bh
push ebp
push esi
; Load ebx,edx,ecx,ebp with four colors, duplicated in high order.
if TRANS16
Trans16 cx, esi
shrd ebx, ecx, 16
mov bx, cx
Trans16 cx, esi+2
shrd edx, ecx, 16
mov dx, cx
Trans16 cx, esi+4, 1
shrd eax, ecx, 16
mov ax, cx
push eax
Trans16 cx, esi+6
shrd ebp, ecx, 16
mov bp, cx
pop ecx
else
mov ax, [esi]
shrd ebx, eax, 16
mov bx, ax
mov ax, [esi+2]
shrd edx, eax, 16
mov dx, ax
mov ax, [esi+4]
and eax, 07fffh
shrd ecx, eax, 16
mov cx, ax
mov ax, [esi+6]
shrd ebp, eax, 16
mov bp, ax
endif
mov esi, nf_width
jmp nf25_0 ; flush prefetch
ALIGN 4
nf25_0:
nf25_11:mov eax, ebx
mov [edi], eax
mov [edi+esi], eax
nf25_12:mov eax, ebx
mov [edi+4], eax
mov [edi+esi+4], eax
nf25_13:mov eax, ebx
mov [edi+8], eax
mov [edi+esi+8], eax
nf25_14:mov eax, ebx
mov [edi+12], eax
mov [edi+esi+12], eax
lea edi, [edi+esi*2]
nf25_21:mov eax, ebx
mov [edi], eax
mov [edi+esi], eax
nf25_22:mov eax, ebx
mov [edi+4], eax
mov [edi+esi+4], eax
nf25_23:mov eax, ebx
mov [edi+8], eax
mov [edi+esi+8], eax
nf25_24:mov eax, ebx
mov [edi+12], eax
mov [edi+esi+12], eax
lea edi, [edi+esi*2]
nf25_31:mov eax, ebx
mov [edi], eax
mov [edi+esi], eax
nf25_32:mov eax, ebx
mov [edi+4], eax
mov [edi+esi+4], eax
nf25_33:mov eax, ebx
mov [edi+8], eax
mov [edi+esi+8], eax
nf25_34:mov eax, ebx
mov [edi+12], eax
mov [edi+esi+12], eax
lea edi, [edi+esi*2]
nf25_41:mov eax, ebx
mov [edi], eax
mov [edi+esi], eax
nf25_42:mov eax, ebx
mov [edi+4], eax
mov [edi+esi+4], eax
nf25_43:mov eax, ebx
mov [edi+8], eax
mov [edi+esi+8], eax
nf25_44:mov eax, ebx
mov [edi+12], eax
mov [edi+esi+12], eax
add edi, esi
pop esi
pop ebp
add esi, 12
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
;----------------------------------------
ALIGN 4
;nf9+32
nf41: ; low 4x8x2 (16 bytes)
test word ptr [esi+4], 08000h
jnz nf57
if 0 ;debug
add esi, 16
mov ebx, 0
jmp nf_solid
endif
xor eax, eax
lea ecx, nfhpk_mov8
lea edx, byte ptr ds:nf41_11+1
mov al, [esi+8]
mov ebx, [ecx+eax*4]
mov [edx+(nf41_11-nf41_11)], bl
mov [edx+(nf41_12-nf41_11)], bh
shr ebx, 16
mov [edx+(nf41_13-nf41_11)], bl
mov [edx+(nf41_14-nf41_11)], bh
mov al, [esi+9]
mov ebx, [ecx+eax*4]
mov [edx+(nf41_21-nf41_11)], bl
mov [edx+(nf41_22-nf41_11)], bh
shr ebx, 16
mov [edx+(nf41_23-nf41_11)], bl
mov [edx+(nf41_24-nf41_11)], bh
mov al, [esi+10]
mov ebx, [ecx+eax*4]
mov [edx+(nf41_31-nf41_11)], bl
mov [edx+(nf41_32-nf41_11)], bh
shr ebx, 16
mov [edx+(nf41_33-nf41_11)], bl
mov [edx+(nf41_34-nf41_11)], bh
mov al, [esi+11]
mov ebx, [ecx+eax*4]
mov [edx+(nf41_41-nf41_11)], bl
mov [edx+(nf41_42-nf41_11)], bh
shr ebx, 16
mov [edx+(nf41_43-nf41_11)], bl
mov [edx+(nf41_44-nf41_11)], bh
lea edx, [edx+(nf41_51-nf41_11)]
mov al, [esi+12]
mov ebx, [ecx+eax*4]
mov [edx+(nf41_51-nf41_51)], bl
mov [edx+(nf41_52-nf41_51)], bh
shr ebx, 16
mov [edx+(nf41_53-nf41_51)], bl
mov [edx+(nf41_54-nf41_51)], bh
mov al, [esi+13]
mov ebx, [ecx+eax*4]
mov [edx+(nf41_61-nf41_51)], bl
mov [edx+(nf41_62-nf41_51)], bh
shr ebx, 16
mov [edx+(nf41_63-nf41_51)], bl
mov [edx+(nf41_64-nf41_51)], bh
mov al, [esi+14]
mov ebx, [ecx+eax*4]
mov [edx+(nf41_71-nf41_51)], bl
mov [edx+(nf41_72-nf41_51)], bh
shr ebx, 16
mov [edx+(nf41_73-nf41_51)], bl
mov [edx+(nf41_74-nf41_51)], bh
mov al, [esi+15]
mov ebx, [ecx+eax*4]
mov [edx+(nf41_81-nf41_51)], bl
mov [edx+(nf41_82-nf41_51)], bh
shr ebx, 16
mov [edx+(nf41_83-nf41_51)], bl
mov [edx+(nf41_84-nf41_51)], bh
push ebp
push esi
; Load ebx,edx,ecx,ebp with four colors, duplicated in high order.
if TRANS16
Trans16 cx, esi, 1
shrd ebx, ecx, 16
mov bx, cx
Trans16 cx, esi+2
shrd edx, ecx, 16
mov dx, cx
Trans16 cx, esi+4
shrd eax, ecx, 16
mov ax, cx
push eax
Trans16 cx, esi+6
shrd ebp, ecx, 16
mov bp, cx
pop ecx
else
mov ax, [esi]
and eax, 07fffh
shrd ebx, eax, 16
mov bx, ax
mov ax, [esi+2]
shrd edx, eax, 16
mov dx, ax
mov ax, [esi+4]
shrd ecx, eax, 16
mov cx, ax
mov ax, [esi+6]
shrd ebp, eax, 16
mov bp, ax
endif
mov esi, nf_width
jmp nf41_0 ; flush prefetch
ALIGN 4
nf41_0:
nf41_11:mov [ebp+0], ebx
nf41_12:mov [ebp+4], ebx
nf41_13:mov [ebp+8], ebx
nf41_14:mov [ebp+12], ebx
add edi, esi
nf41_21:mov [ebp+0], ebx
nf41_22:mov [ebp+4], ebx
nf41_23:mov [ebp+8], ebx
nf41_24:mov [ebp+12], ebx
add edi, esi
nf41_31:mov [ebp+0], ebx
nf41_32:mov [ebp+4], ebx
nf41_33:mov [ebp+8], ebx
nf41_34:mov [ebp+12], ebx
add edi, esi
nf41_41:mov [ebp+0], ebx
nf41_42:mov [ebp+4], ebx
nf41_43:mov [ebp+8], ebx
nf41_44:mov [ebp+12], ebx
add edi, esi
nf41_51:mov [ebp+0], ebx
nf41_52:mov [ebp+4], ebx
nf41_53:mov [ebp+8], ebx
nf41_54:mov [ebp+12], ebx
add edi, esi
nf41_61:mov [ebp+0], ebx
nf41_62:mov [ebp+4], ebx
nf41_63:mov [ebp+8], ebx
nf41_64:mov [ebp+12], ebx
add edi, esi
nf41_71:mov [ebp+0], ebx
nf41_72:mov [ebp+4], ebx
nf41_73:mov [ebp+8], ebx
nf41_74:mov [ebp+12], ebx
add edi, esi
nf41_81:mov [ebp+0], ebx
nf41_82:mov [ebp+4], ebx
nf41_83:mov [ebp+8], ebx
nf41_84:mov [ebp+12], ebx
pop esi
pop ebp
add esi, 16
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
;----------------------------------------
ALIGN 4
;nf9+48
nf57: ; low 8x4x2 (16 bytes)
if 0 ;debug
add esi, 16
mov ebx, 0
jmp nf_solid
endif
xor eax, eax
lea ecx, nfhpk_mov4
lea edx, byte ptr ds:nf57_11+2
mov al, [esi+8]
mov ebx, [ecx+eax*4]
mov [edx+(nf57_11-nf57_11)], bh
mov [edx+(nf57_12-nf57_11)], bl
shr ebx, 16
mov [edx+(nf57_13-nf57_11)], bh
mov [edx+(nf57_14-nf57_11)], bl
mov al, [esi+9]
mov ebx, [ecx+eax*4]
mov [edx+(nf57_15-nf57_11)], bh
mov [edx+(nf57_16-nf57_11)], bl
shr ebx, 16
mov [edx+(nf57_17-nf57_11)], bh
mov [edx+(nf57_18-nf57_11)], bl
mov al, [esi+10]
mov ebx, [ecx+eax*4]
mov [edx+(nf57_21-nf57_11)], bh
mov [edx+(nf57_22-nf57_11)], bl
shr ebx, 16
mov [edx+(nf57_23-nf57_11)], bh
mov [edx+(nf57_24-nf57_11)], bl
mov al, [esi+11]
mov ebx, [ecx+eax*4]
mov [edx+(nf57_25-nf57_11)], bh
mov [edx+(nf57_26-nf57_11)], bl
shr ebx, 16
mov [edx+(nf57_27-nf57_11)], bh
mov [edx+(nf57_28-nf57_11)], bl
mov al, [esi+12]
mov ebx, [ecx+eax*4]
mov [edx+(nf57_31-nf57_11)], bh
mov [edx+(nf57_32-nf57_11)], bl
shr ebx, 16
mov [edx+(nf57_33-nf57_11)], bh
mov [edx+(nf57_34-nf57_11)], bl
mov al, [esi+13]
mov ebx, [ecx+eax*4]
mov [edx+(nf57_35-nf57_11)], bh
mov [edx+(nf57_36-nf57_11)], bl
shr ebx, 16
mov [edx+(nf57_37-nf57_11)], bh
mov [edx+(nf57_38-nf57_11)], bl
mov al, [esi+14]
mov ebx, [ecx+eax*4]
mov [edx+(nf57_41-nf57_11)], bh
mov [edx+(nf57_42-nf57_11)], bl
shr ebx, 16
mov [edx+(nf57_43-nf57_11)], bh
mov [edx+(nf57_44-nf57_11)], bl
mov al, [esi+15]
mov ebx, [ecx+eax*4]
mov [edx+(nf57_45-nf57_11)], bh
mov [edx+(nf57_46-nf57_11)], bl
shr ebx, 16
mov [edx+(nf57_47-nf57_11)], bh
mov [edx+(nf57_48-nf57_11)], bl
push ebp
push esi
; Load bx,dx,cx,bp with four colors
if TRANS16
Trans16 bx, esi, 1
Trans16 dx, esi+2
Trans16 cx, esi+4, 1
Trans16 bp, esi+6
else
mov bx, [esi]
and ebx, 07fffh
mov dx, [esi+2]
mov cx, [esi+4]
and ecx, 07fffh
mov bp, [esi+6]
endif
mov esi, nf_width
jmp nf57_0 ; flush prefetch
ALIGN 4
nf57_0:
nf57_11:mov ax, bx
shl eax, 16
nf57_12:mov ax, bx
mov [edi], eax
mov [edi+esi], eax
nf57_13:mov ax, bx
shl eax, 16
nf57_14:mov ax, bx
mov [edi+4], eax
mov [edi+esi+4], eax
nf57_15:mov ax, bx
shl eax, 16
nf57_16:mov ax, bx
mov [edi+8], eax
mov [edi+esi+8], eax
nf57_17:mov ax, bx
shl eax, 16
nf57_18:mov ax, bx
mov [edi+12], eax
mov [edi+esi+12], eax
lea edi, [edi+esi*2]
nf57_21:mov ax, bx
shl eax, 16
nf57_22:mov ax, bx
mov [edi], eax
mov [edi+esi], eax
nf57_23:mov ax, bx
shl eax, 16
nf57_24:mov ax, bx
mov [edi+4], eax
mov [edi+esi+4], eax
nf57_25:mov ax, bx
shl eax, 16
nf57_26:mov ax, bx
mov [edi+8], eax
mov [edi+esi+8], eax
nf57_27:mov ax, bx
shl eax, 16
nf57_28:mov ax, bx
mov [edi+12], eax
mov [edi+esi+12], eax
lea edi, [edi+esi*2]
nf57_31:mov ax, bx
shl eax, 16
nf57_32:mov ax, bx
mov [edi], eax
mov [edi+esi], eax
nf57_33:mov ax, bx
shl eax, 16
nf57_34:mov ax, bx
mov [edi+4], eax
mov [edi+esi+4], eax
nf57_35:mov ax, bx
shl eax, 16
nf57_36:mov ax, bx
mov [edi+8], eax
mov [edi+esi+8], eax
nf57_37:mov ax, bx
shl eax, 16
nf57_38:mov ax, bx
mov [edi+12], eax
mov [edi+esi+12], eax
lea edi, [edi+esi*2]
nf57_41:mov ax, bx
shl eax, 16
nf57_42:mov ax, bx
mov [edi], eax
mov [edi+esi], eax
nf57_43:mov ax, bx
shl eax, 16
nf57_44:mov ax, bx
mov [edi+4], eax
mov [edi+esi+4], eax
nf57_45:mov ax, bx
shl eax, 16
nf57_46:mov ax, bx
mov [edi+8], eax
mov [edi+esi+8], eax
nf57_47:mov ax, bx
shl eax, 16
nf57_48:mov ax, bx
mov [edi+12], eax
mov [edi+esi+12], eax
add edi, esi
pop esi
pop ebp
add esi, 16
sub edi, nfpk_back_right
retn
;----------------------------------------
ALIGN 4
nf10: ; 2x2 4x4x2 (48 bytes)
test word ptr [esi], 08000h
jnz nf26
if 0 ;debug
add esi, 48
mov ebx, 0
jmp nf_solid
endif
xor eax, eax
lea ecx, nfhpk_mov4
lea edx, byte ptr ds:nf10_11+2
mov al, [esi+8]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_11-nf10_11)], bh
mov [edx+(nf10_12-nf10_11)], bl
shr ebx, 16
mov [edx+(nf10_13-nf10_11)], bh
mov [edx+(nf10_14-nf10_11)], bl
mov al, [esi+9]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_15-nf10_11)], bh
mov [edx+(nf10_16-nf10_11)], bl
shr ebx, 16
mov [edx+(nf10_17-nf10_11)], bh
mov [edx+(nf10_18-nf10_11)], bl
mov al, [esi+10]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_21-nf10_11)], bh
mov [edx+(nf10_22-nf10_11)], bl
shr ebx, 16
mov [edx+(nf10_23-nf10_11)], bh
mov [edx+(nf10_24-nf10_11)], bl
mov al, [esi+11]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_25-nf10_11)], bh
mov [edx+(nf10_26-nf10_11)], bl
shr ebx, 16
mov [edx+(nf10_27-nf10_11)], bh
mov [edx+(nf10_28-nf10_11)], bl
mov al, [esi+20]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_31-nf10_11)], bh
mov [edx+(nf10_32-nf10_11)], bl
shr ebx, 16
mov [edx+(nf10_33-nf10_11)], bh
mov [edx+(nf10_34-nf10_11)], bl
mov al, [esi+21]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_35-nf10_11)], bh
mov [edx+(nf10_36-nf10_11)], bl
shr ebx, 16
mov [edx+(nf10_37-nf10_11)], bh
mov [edx+(nf10_38-nf10_11)], bl
mov al, [esi+22]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_41-nf10_11)], bh
mov [edx+(nf10_42-nf10_11)], bl
shr ebx, 16
mov [edx+(nf10_43-nf10_11)], bh
mov [edx+(nf10_44-nf10_11)], bl
mov al, [esi+23]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_45-nf10_11)], bh
mov [edx+(nf10_46-nf10_11)], bl
shr ebx, 16
mov [edx+(nf10_47-nf10_11)], bh
mov [edx+(nf10_48-nf10_11)], bl
lea edx, [edx+(nf10_51-nf10_11)]
mov al, [esi+32]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_51-nf10_51)], bh
mov [edx+(nf10_52-nf10_51)], bl
shr ebx, 16
mov [edx+(nf10_53-nf10_51)], bh
mov [edx+(nf10_54-nf10_51)], bl
mov al, [esi+33]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_55-nf10_51)], bh
mov [edx+(nf10_56-nf10_51)], bl
shr ebx, 16
mov [edx+(nf10_57-nf10_51)], bh
mov [edx+(nf10_58-nf10_51)], bl
mov al, [esi+34]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_61-nf10_51)], bh
mov [edx+(nf10_62-nf10_51)], bl
shr ebx, 16
mov [edx+(nf10_63-nf10_51)], bh
mov [edx+(nf10_64-nf10_51)], bl
mov al, [esi+35]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_65-nf10_51)], bh
mov [edx+(nf10_66-nf10_51)], bl
shr ebx, 16
mov [edx+(nf10_67-nf10_51)], bh
mov [edx+(nf10_68-nf10_51)], bl
mov al, [esi+44]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_71-nf10_51)], bh
mov [edx+(nf10_72-nf10_51)], bl
shr ebx, 16
mov [edx+(nf10_73-nf10_51)], bh
mov [edx+(nf10_74-nf10_51)], bl
mov al, [esi+45]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_75-nf10_51)], bh
mov [edx+(nf10_76-nf10_51)], bl
shr ebx, 16
mov [edx+(nf10_77-nf10_51)], bh
mov [edx+(nf10_78-nf10_51)], bl
mov al, [esi+46]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_81-nf10_51)], bh
mov [edx+(nf10_82-nf10_51)], bl
shr ebx, 16
mov [edx+(nf10_83-nf10_51)], bh
mov [edx+(nf10_84-nf10_51)], bl
mov al, [esi+47]
mov ebx, [ecx+eax*4]
mov [edx+(nf10_85-nf10_51)], bh
mov [edx+(nf10_86-nf10_51)], bl
shr ebx, 16
mov [edx+(nf10_87-nf10_51)], bh
mov [edx+(nf10_88-nf10_51)], bl
push ebp
push esi
; Load bx,dx,cx,bp with four colors
if TRANS16
Trans16 bx, esi
Trans16 dx, esi+2
Trans16 cx, esi+4
Trans16 bp, esi+6
else
mov bx, [esi]
mov dx, [esi+2]
mov cx, [esi+4]
mov bp, [esi+6]
endif
mov esi, nf_width
jmp nf10_0 ; flush prefetch
ALIGN 4
nf10_0:
nf10_11:mov ax, bx
shl eax, 16
nf10_12:mov ax, bx
mov [edi], eax
nf10_13:mov ax, bx
shl eax, 16
nf10_14:mov ax, bx
mov [edi+4], eax
add edi, esi
nf10_15:mov ax, bx
shl eax, 16
nf10_16:mov ax, bx
mov [edi], eax
nf10_17:mov ax, bx
shl eax, 16
nf10_18:mov ax, bx
mov [edi+4], eax
add edi, esi
nf10_21:mov ax, bx
shl eax, 16
nf10_22:mov ax, bx
mov [edi], eax
nf10_23:mov ax, bx
shl eax, 16
nf10_24:mov ax, bx
mov [edi+4], eax
add edi, esi
nf10_25:mov ax, bx
shl eax, 16
nf10_26:mov ax, bx
mov [edi], eax
nf10_27:mov ax, bx
shl eax, 16
nf10_28:mov ax, bx
mov [edi+4], eax
add edi, esi
; Load bx,dx,cx,bp with four colors
if TRANS16
mov esi, [esp]
Trans16 bx, esi+12
Trans16 dx, esi+14
Trans16 cx, esi+16
Trans16 bp, esi+18
mov esi, nf_width
else
mov eax, [esp]
mov bx, [eax+12]
mov dx, [eax+14]
mov cx, [eax+16]
mov bp, [eax+18]
endif
nf10_31:mov ax, bx
shl eax, 16
nf10_32:mov ax, bx
mov [edi], eax
nf10_33:mov ax, bx
shl eax, 16
nf10_34:mov ax, bx
mov [edi+4], eax
add edi, esi
nf10_35:mov ax, bx
shl eax, 16
nf10_36:mov ax, bx
mov [edi], eax
nf10_37:mov ax, bx
shl eax, 16
nf10_38:mov ax, bx
mov [edi+4], eax
add edi, esi
nf10_41:mov ax, bx
shl eax, 16
nf10_42:mov ax, bx
mov [edi], eax
nf10_43:mov ax, bx
shl eax, 16
nf10_44:mov ax, bx
mov [edi+4], eax
add edi, esi
nf10_45:mov ax, bx
shl eax, 16
nf10_46:mov ax, bx
mov [edi], eax
nf10_47:mov ax, bx
shl eax, 16
nf10_48:mov ax, bx
mov [edi+4], eax
add edi, esi
lea eax, [esi*8-8]
sub edi, eax
; Load bx,dx,cx,bp with four colors
if TRANS16
mov esi, [esp]
Trans16 bx, esi+24
Trans16 dx, esi+26
Trans16 cx, esi+28
Trans16 bp, esi+30
mov esi, nf_width
else
mov eax, [esp]
mov bx, [eax+24]
mov dx, [eax+26]
mov cx, [eax+28]
mov bp, [eax+30]
endif
nf10_51:mov ax, bx
shl eax, 16
nf10_52:mov ax, bx
mov [edi], eax
nf10_53:mov ax, bx
shl eax, 16
nf10_54:mov ax, bx
mov [edi+4], eax
add edi, esi
nf10_55:mov ax, bx
shl eax, 16
nf10_56:mov ax, bx
mov [edi], eax
nf10_57:mov ax, bx
shl eax, 16
nf10_58:mov ax, bx
mov [edi+4], eax
add edi, esi
nf10_61:mov ax, bx
shl eax, 16
nf10_62:mov ax, bx
mov [edi], eax
nf10_63:mov ax, bx
shl eax, 16
nf10_64:mov ax, bx
mov [edi+4], eax
add edi, esi
nf10_65:mov ax, bx
shl eax, 16
nf10_66:mov ax, bx
mov [edi], eax
nf10_67:mov ax, bx
shl eax, 16
nf10_68:mov ax, bx
mov [edi+4], eax
add edi, esi
; Load bx,dx,cx,bp with four colors
if TRANS16
mov esi, [esp]
Trans16 bx, esi+36
Trans16 dx, esi+38
Trans16 cx, esi+40
Trans16 bp, esi+42
mov esi, nf_width
else
mov eax, [esp]
mov bx, [eax+36]
mov dx, [eax+38]
mov cx, [eax+40]
mov bp, [eax+42]
endif
nf10_71:mov ax, bx
shl eax, 16
nf10_72:mov ax, bx
mov [edi], eax
nf10_73:mov ax, bx
shl eax, 16
nf10_74:mov ax, bx
mov [edi+4], eax
add edi, esi
nf10_75:mov ax, bx
shl eax, 16
nf10_76:mov ax, bx
mov [edi], eax
nf10_77:mov ax, bx
shl eax, 16
nf10_78:mov ax, bx
mov [edi+4], eax
add edi, esi
nf10_81:mov ax, bx
shl eax, 16
nf10_82:mov ax, bx
mov [edi], eax
nf10_83:mov ax, bx
shl eax, 16
nf10_84:mov ax, bx
mov [edi+4], eax
add edi, esi
nf10_85:mov ax, bx
shl eax, 16
nf10_86:mov ax, bx
mov [edi], eax
nf10_87:mov ax, bx
shl eax, 16
nf10_88:mov ax, bx
mov [edi+4], eax
pop esi
pop ebp
add esi, 48
sub edi, 8
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
;----------------------------------------
ALIGN 4
;nf10+16
nf26: ; 2x1 4x8x2 (32 bytes)
test word ptr [esi+16], 08000h
jnz nf42
if 0 ;debug
add esi, 32
mov ebx, 0
jmp nf_solid
endif
xor eax, eax
lea ecx, nfhpk_mov4
lea edx, byte ptr ds:nf26_11+2
mov al, [esi+8]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_11-nf26_11)], bh
mov [edx+(nf26_12-nf26_11)], bl
shr ebx, 16
mov [edx+(nf26_13-nf26_11)], bh
mov [edx+(nf26_14-nf26_11)], bl
mov al, [esi+9]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_15-nf26_11)], bh
mov [edx+(nf26_16-nf26_11)], bl
shr ebx, 16
mov [edx+(nf26_17-nf26_11)], bh
mov [edx+(nf26_18-nf26_11)], bl
mov al, [esi+10]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_21-nf26_11)], bh
mov [edx+(nf26_22-nf26_11)], bl
shr ebx, 16
mov [edx+(nf26_23-nf26_11)], bh
mov [edx+(nf26_24-nf26_11)], bl
mov al, [esi+11]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_25-nf26_11)], bh
mov [edx+(nf26_26-nf26_11)], bl
shr ebx, 16
mov [edx+(nf26_27-nf26_11)], bh
mov [edx+(nf26_28-nf26_11)], bl
mov al, [esi+12]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_31-nf26_11)], bh
mov [edx+(nf26_32-nf26_11)], bl
shr ebx, 16
mov [edx+(nf26_33-nf26_11)], bh
mov [edx+(nf26_34-nf26_11)], bl
mov al, [esi+13]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_35-nf26_11)], bh
mov [edx+(nf26_36-nf26_11)], bl
shr ebx, 16
mov [edx+(nf26_37-nf26_11)], bh
mov [edx+(nf26_38-nf26_11)], bl
mov al, [esi+14]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_41-nf26_11)], bh
mov [edx+(nf26_42-nf26_11)], bl
shr ebx, 16
mov [edx+(nf26_43-nf26_11)], bh
mov [edx+(nf26_44-nf26_11)], bl
mov al, [esi+15]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_45-nf26_11)], bh
mov [edx+(nf26_46-nf26_11)], bl
shr ebx, 16
mov [edx+(nf26_47-nf26_11)], bh
mov [edx+(nf26_48-nf26_11)], bl
lea edx, [edx+(nf26_51-nf26_11)]
mov al, [esi+24]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_51-nf26_51)], bh
mov [edx+(nf26_52-nf26_51)], bl
shr ebx, 16
mov [edx+(nf26_53-nf26_51)], bh
mov [edx+(nf26_54-nf26_51)], bl
mov al, [esi+25]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_55-nf26_51)], bh
mov [edx+(nf26_56-nf26_51)], bl
shr ebx, 16
mov [edx+(nf26_57-nf26_51)], bh
mov [edx+(nf26_58-nf26_51)], bl
mov al, [esi+26]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_61-nf26_51)], bh
mov [edx+(nf26_62-nf26_51)], bl
shr ebx, 16
mov [edx+(nf26_63-nf26_51)], bh
mov [edx+(nf26_64-nf26_51)], bl
mov al, [esi+27]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_65-nf26_51)], bh
mov [edx+(nf26_66-nf26_51)], bl
shr ebx, 16
mov [edx+(nf26_67-nf26_51)], bh
mov [edx+(nf26_68-nf26_51)], bl
mov al, [esi+28]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_71-nf26_51)], bh
mov [edx+(nf26_72-nf26_51)], bl
shr ebx, 16
mov [edx+(nf26_73-nf26_51)], bh
mov [edx+(nf26_74-nf26_51)], bl
mov al, [esi+29]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_75-nf26_51)], bh
mov [edx+(nf26_76-nf26_51)], bl
shr ebx, 16
mov [edx+(nf26_77-nf26_51)], bh
mov [edx+(nf26_78-nf26_51)], bl
mov al, [esi+30]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_81-nf26_51)], bh
mov [edx+(nf26_82-nf26_51)], bl
shr ebx, 16
mov [edx+(nf26_83-nf26_51)], bh
mov [edx+(nf26_84-nf26_51)], bl
mov al, [esi+31]
mov ebx, [ecx+eax*4]
mov [edx+(nf26_85-nf26_51)], bh
mov [edx+(nf26_86-nf26_51)], bl
shr ebx, 16
mov [edx+(nf26_87-nf26_51)], bh
mov [edx+(nf26_88-nf26_51)], bl
push ebp
push esi
; Load bx,dx,cx,bp with four colors
if TRANS16
Trans16 bx, esi, 1
Trans16 dx, esi+2
Trans16 cx, esi+4
Trans16 bp, esi+6
else
mov bx, [esi]
and ebx, 07fffh
mov dx, [esi+2]
mov cx, [esi+4]
mov bp, [esi+6]
endif
mov esi, nf_width
jmp nf26_0 ; flush prefetch
ALIGN 4
nf26_0:
nf26_11:mov ax, bx
shl eax, 16
nf26_12:mov ax, bx
mov [edi], eax
nf26_13:mov ax, bx
shl eax, 16
nf26_14:mov ax, bx
mov [edi+4], eax
add edi, esi
nf26_15:mov ax, bx
shl eax, 16
nf26_16:mov ax, bx
mov [edi], eax
nf26_17:mov ax, bx
shl eax, 16
nf26_18:mov ax, bx
mov [edi+4], eax
add edi, esi
nf26_21:mov ax, bx
shl eax, 16
nf26_22:mov ax, bx
mov [edi], eax
nf26_23:mov ax, bx
shl eax, 16
nf26_24:mov ax, bx
mov [edi+4], eax
add edi, esi
nf26_25:mov ax, bx
shl eax, 16
nf26_26:mov ax, bx
mov [edi], eax
nf26_27:mov ax, bx
shl eax, 16
nf26_28:mov ax, bx
mov [edi+4], eax
add edi, esi
nf26_31:mov ax, bx
shl eax, 16
nf26_32:mov ax, bx
mov [edi], eax
nf26_33:mov ax, bx
shl eax, 16
nf26_34:mov ax, bx
mov [edi+4], eax
add edi, esi
nf26_35:mov ax, bx
shl eax, 16
nf26_36:mov ax, bx
mov [edi], eax
nf26_37:mov ax, bx
shl eax, 16
nf26_38:mov ax, bx
mov [edi+4], eax
add edi, esi
nf26_41:mov ax, bx
shl eax, 16
nf26_42:mov ax, bx
mov [edi], eax
nf26_43:mov ax, bx
shl eax, 16
nf26_44:mov ax, bx
mov [edi+4], eax
add edi, esi
nf26_45:mov ax, bx
shl eax, 16
nf26_46:mov ax, bx
mov [edi], eax
nf26_47:mov ax, bx
shl eax, 16
nf26_48:mov ax, bx
mov [edi+4], eax
add edi, esi
lea eax, [esi*8-8]
sub edi, eax
; Load bx,dx,cx,bp with four colors
if TRANS16
mov esi, [esp]
Trans16 bx, esi+16
Trans16 dx, esi+18
Trans16 cx, esi+20
Trans16 bp, esi+22
mov esi, nf_width
else
mov eax, [esp]
mov bx, [eax+16]
mov dx, [eax+18]
mov cx, [eax+20]
mov bp, [eax+22]
endif
nf26_51:mov ax, bx
shl eax, 16
nf26_52:mov ax, bx
mov [edi], eax
nf26_53:mov ax, bx
shl eax, 16
nf26_54:mov ax, bx
mov [edi+4], eax
add edi, esi
nf26_55:mov ax, bx
shl eax, 16
nf26_56:mov ax, bx
mov [edi], eax
nf26_57:mov ax, bx
shl eax, 16
nf26_58:mov ax, bx
mov [edi+4], eax
add edi, esi
nf26_61:mov ax, bx
shl eax, 16
nf26_62:mov ax, bx
mov [edi], eax
nf26_63:mov ax, bx
shl eax, 16
nf26_64:mov ax, bx
mov [edi+4], eax
add edi, esi
nf26_65:mov ax, bx
shl eax, 16
nf26_66:mov ax, bx
mov [edi], eax
nf26_67:mov ax, bx
shl eax, 16
nf26_68:mov ax, bx
mov [edi+4], eax
add edi, esi
nf26_71:mov ax, bx
shl eax, 16
nf26_72:mov ax, bx
mov [edi], eax
nf26_73:mov ax, bx
shl eax, 16
nf26_74:mov ax, bx
mov [edi+4], eax
add edi, esi
nf26_75:mov ax, bx
shl eax, 16
nf26_76:mov ax, bx
mov [edi], eax
nf26_77:mov ax, bx
shl eax, 16
nf26_78:mov ax, bx
mov [edi+4], eax
add edi, esi
nf26_81:mov ax, bx
shl eax, 16
nf26_82:mov ax, bx
mov [edi], eax
nf26_83:mov ax, bx
shl eax, 16
nf26_84:mov ax, bx
mov [edi+4], eax
add edi, esi
nf26_85:mov ax, bx
shl eax, 16
nf26_86:mov ax, bx
mov [edi], eax
nf26_87:mov ax, bx
shl eax, 16
nf26_88:mov ax, bx
mov [edi+4], eax
pop esi
pop ebp
add esi, 32
sub edi, 8
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
;----------------------------------------
ALIGN 4
;nf10+32
nf42: ; 1x2 8x4x2 (32 bytes)
if 0 ;debug
add esi, 32
mov ebx, 0
jmp nf_solid
endif
xor eax, eax
lea ecx, nfhpk_mov4
lea edx, byte ptr ds:nf42_11+2
mov al, [esi+8]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_11-nf42_11)], bh
mov [edx+(nf42_12-nf42_11)], bl
shr ebx, 16
mov [edx+(nf42_13-nf42_11)], bh
mov [edx+(nf42_14-nf42_11)], bl
mov al, [esi+9]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_15-nf42_11)], bh
mov [edx+(nf42_16-nf42_11)], bl
shr ebx, 16
mov [edx+(nf42_17-nf42_11)], bh
mov [edx+(nf42_18-nf42_11)], bl
mov al, [esi+10]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_21-nf42_11)], bh
mov [edx+(nf42_22-nf42_11)], bl
shr ebx, 16
mov [edx+(nf42_23-nf42_11)], bh
mov [edx+(nf42_24-nf42_11)], bl
mov al, [esi+11]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_25-nf42_11)], bh
mov [edx+(nf42_26-nf42_11)], bl
shr ebx, 16
mov [edx+(nf42_27-nf42_11)], bh
mov [edx+(nf42_28-nf42_11)], bl
mov al, [esi+12]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_31-nf42_11)], bh
mov [edx+(nf42_32-nf42_11)], bl
shr ebx, 16
mov [edx+(nf42_33-nf42_11)], bh
mov [edx+(nf42_34-nf42_11)], bl
mov al, [esi+13]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_35-nf42_11)], bh
mov [edx+(nf42_36-nf42_11)], bl
shr ebx, 16
mov [edx+(nf42_37-nf42_11)], bh
mov [edx+(nf42_38-nf42_11)], bl
mov al, [esi+14]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_41-nf42_11)], bh
mov [edx+(nf42_42-nf42_11)], bl
shr ebx, 16
mov [edx+(nf42_43-nf42_11)], bh
mov [edx+(nf42_44-nf42_11)], bl
mov al, [esi+15]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_45-nf42_11)], bh
mov [edx+(nf42_46-nf42_11)], bl
shr ebx, 16
mov [edx+(nf42_47-nf42_11)], bh
mov [edx+(nf42_48-nf42_11)], bl
lea edx, [edx+(nf42_51-nf42_11)]
mov al, [esi+24]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_51-nf42_51)], bh
mov [edx+(nf42_52-nf42_51)], bl
shr ebx, 16
mov [edx+(nf42_53-nf42_51)], bh
mov [edx+(nf42_54-nf42_51)], bl
mov al, [esi+25]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_55-nf42_51)], bh
mov [edx+(nf42_56-nf42_51)], bl
shr ebx, 16
mov [edx+(nf42_57-nf42_51)], bh
mov [edx+(nf42_58-nf42_51)], bl
mov al, [esi+26]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_61-nf42_51)], bh
mov [edx+(nf42_62-nf42_51)], bl
shr ebx, 16
mov [edx+(nf42_63-nf42_51)], bh
mov [edx+(nf42_64-nf42_51)], bl
mov al, [esi+27]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_65-nf42_51)], bh
mov [edx+(nf42_66-nf42_51)], bl
shr ebx, 16
mov [edx+(nf42_67-nf42_51)], bh
mov [edx+(nf42_68-nf42_51)], bl
mov al, [esi+28]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_71-nf42_51)], bh
mov [edx+(nf42_72-nf42_51)], bl
shr ebx, 16
mov [edx+(nf42_73-nf42_51)], bh
mov [edx+(nf42_74-nf42_51)], bl
mov al, [esi+29]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_75-nf42_51)], bh
mov [edx+(nf42_76-nf42_51)], bl
shr ebx, 16
mov [edx+(nf42_77-nf42_51)], bh
mov [edx+(nf42_78-nf42_51)], bl
mov al, [esi+30]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_81-nf42_51)], bh
mov [edx+(nf42_82-nf42_51)], bl
shr ebx, 16
mov [edx+(nf42_83-nf42_51)], bh
mov [edx+(nf42_84-nf42_51)], bl
mov al, [esi+31]
mov ebx, [ecx+eax*4]
mov [edx+(nf42_85-nf42_51)], bh
mov [edx+(nf42_86-nf42_51)], bl
shr ebx, 16
mov [edx+(nf42_87-nf42_51)], bh
mov [edx+(nf42_88-nf42_51)], bl
push ebp
push esi
; Load bx,dx,cx,bp with four colors
if TRANS16
Trans16 bx, esi, 1
Trans16 dx, esi+2
Trans16 cx, esi+4
Trans16 bp, esi+6
else
mov bx, [esi]
and ebx, 07fffh
mov dx, [esi+2]
mov cx, [esi+4]
mov bp, [esi+6]
endif
mov esi, nf_width
jmp nf42_0 ; flush prefetch
ALIGN 4
nf42_0:
nf42_11:mov ax, bx
shl eax, 16
nf42_12:mov ax, bx
mov [edi], eax
nf42_13:mov ax, bx
shl eax, 16
nf42_14:mov ax, bx
mov [edi+4], eax
nf42_15:mov ax, bx
shl eax, 16
nf42_16:mov ax, bx
mov [edi+8], eax
nf42_17:mov ax, bx
shl eax, 16
nf42_18:mov ax, bx
mov [edi+12], eax
add edi, esi
nf42_21:mov ax, bx
shl eax, 16
nf42_22:mov ax, bx
mov [edi], eax
nf42_23:mov ax, bx
shl eax, 16
nf42_24:mov ax, bx
mov [edi+4], eax
nf42_25:mov ax, bx
shl eax, 16
nf42_26:mov ax, bx
mov [edi+8], eax
nf42_27:mov ax, bx
shl eax, 16
nf42_28:mov ax, bx
mov [edi+12], eax
add edi, esi
nf42_31:mov ax, bx
shl eax, 16
nf42_32:mov ax, bx
mov [edi], eax
nf42_33:mov ax, bx
shl eax, 16
nf42_34:mov ax, bx
mov [edi+4], eax
nf42_35:mov ax, bx
shl eax, 16
nf42_36:mov ax, bx
mov [edi+8], eax
nf42_37:mov ax, bx
shl eax, 16
nf42_38:mov ax, bx
mov [edi+12], eax
add edi, esi
nf42_41:mov ax, bx
shl eax, 16
nf42_42:mov ax, bx
mov [edi], eax
nf42_43:mov ax, bx
shl eax, 16
nf42_44:mov ax, bx
mov [edi+4], eax
nf42_45:mov ax, bx
shl eax, 16
nf42_46:mov ax, bx
mov [edi+8], eax
nf42_47:mov ax, bx
shl eax, 16
nf42_48:mov ax, bx
mov [edi+12], eax
add edi, esi
; Load bx,dx,cx,bp with four colors
if TRANS16
mov esi, [esp]
Trans16 bx, esi+16, 1
Trans16 dx, esi+18
Trans16 cx, esi+20
Trans16 bp, esi+22
mov esi, nf_width
else
mov eax, [esp]
mov bx, [eax+16]
and ebx, 07fffh
mov dx, [eax+18]
mov cx, [eax+20]
mov bp, [eax+22]
endif
nf42_51:mov ax, bx
shl eax, 16
nf42_52:mov ax, bx
mov [edi], eax
nf42_53:mov ax, bx
shl eax, 16
nf42_54:mov ax, bx
mov [edi+4], eax
nf42_55:mov ax, bx
shl eax, 16
nf42_56:mov ax, bx
mov [edi+8], eax
nf42_57:mov ax, bx
shl eax, 16
nf42_58:mov ax, bx
mov [edi+12], eax
add edi, esi
nf42_61:mov ax, bx
shl eax, 16
nf42_62:mov ax, bx
mov [edi], eax
nf42_63:mov ax, bx
shl eax, 16
nf42_64:mov ax, bx
mov [edi+4], eax
nf42_65:mov ax, bx
shl eax, 16
nf42_66:mov ax, bx
mov [edi+8], eax
nf42_67:mov ax, bx
shl eax, 16
nf42_68:mov ax, bx
mov [edi+12], eax
add edi, esi
nf42_71:mov ax, bx
shl eax, 16
nf42_72:mov ax, bx
mov [edi], eax
nf42_73:mov ax, bx
shl eax, 16
nf42_74:mov ax, bx
mov [edi+4], eax
nf42_75:mov ax, bx
shl eax, 16
nf42_76:mov ax, bx
mov [edi+8], eax
nf42_77:mov ax, bx
shl eax, 16
nf42_78:mov ax, bx
mov [edi+12], eax
add edi, esi
nf42_81:mov ax, bx
shl eax, 16
nf42_82:mov ax, bx
mov [edi], eax
nf42_83:mov ax, bx
shl eax, 16
nf42_84:mov ax, bx
mov [edi+4], eax
nf42_85:mov ax, bx
shl eax, 16
nf42_86:mov ax, bx
mov [edi+8], eax
nf42_87:mov ax, bx
shl eax, 16
nf42_88:mov ax, bx
mov [edi+12], eax
pop esi
pop ebp
add esi, 32
sub edi, nfpk_back_right
retn
;----------------------------------------
ALIGN 4
nf11: ; 8x8x16 (128 bytes)
if 0 ;debug
add esi, 128
mov ebx, 0
jmp nf_solid
endif
mov edx, nf_width
if TRANS16
Trans16Blk MACRO idx
Trans16 bx, idx
mov [edi], bx
Trans16 bx, idx+2
mov [edi+2], bx
Trans16 bx, idx+4
mov [edi+4], bx
Trans16 bx, idx+6
mov [edi+6], bx
Trans16 bx, idx+8
mov [edi+8], bx
Trans16 bx, idx+10
mov [edi+10], bx
Trans16 bx, idx+12
mov [edi+12], bx
Trans16 bx, idx+14
mov [edi+14], bx
ENDM
Trans16Blk esi ;0
add edi, edx
Trans16Blk esi+16 ;1
add edi, edx
Trans16Blk esi+32 ;2
add edi, edx
Trans16Blk esi+48 ;3
add edi, edx
Trans16Blk esi+64 ;4
add edi, edx
Trans16Blk esi+80 ;5
add edi, edx
Trans16Blk esi+96 ;6
add edi, edx
Trans16Blk esi+112 ;7
else
mov eax, [esi] ;0
mov [edi], eax
mov eax, [esi+4]
mov [edi+4], eax
mov eax, [esi+8]
mov [edi+8], eax
mov eax, [esi+12]
mov [edi+12], eax
add edi, edx
mov eax, [esi+16] ;1
mov [edi], eax
mov eax, [esi+20]
mov [edi+4], eax
mov eax, [esi+24]
mov [edi+8], eax
mov eax, [esi+28]
mov [edi+12], eax
add edi, edx
mov eax, [esi+32] ;2
mov [edi], eax
mov eax, [esi+36]
mov [edi+4], eax
mov eax, [esi+40]
mov [edi+8], eax
mov eax, [esi+44]
mov [edi+12], eax
add edi, edx
mov eax, [esi+48] ;3
mov [edi], eax
mov eax, [esi+52]
mov [edi+4], eax
mov eax, [esi+56]
mov [edi+8], eax
mov eax, [esi+60]
mov [edi+12], eax
add edi, edx
mov eax, [esi+64] ;4
mov [edi], eax
mov eax, [esi+68]
mov [edi+4], eax
mov eax, [esi+72]
mov [edi+8], eax
mov eax, [esi+76]
mov [edi+12], eax
add edi, edx
mov eax, [esi+80] ;5
mov [edi], eax
mov eax, [esi+84]
mov [edi+4], eax
mov eax, [esi+88]
mov [edi+8], eax
mov eax, [esi+92]
mov [edi+12], eax
add edi, edx
mov eax, [esi+96] ;6
mov [edi], eax
mov eax, [esi+100]
mov [edi+4], eax
mov eax, [esi+104]
mov [edi+8], eax
mov eax, [esi+108]
mov [edi+12], eax
add edi, edx
mov eax, [esi+112] ;7
mov [edi], eax
mov eax, [esi+116]
mov [edi+4], eax
mov eax, [esi+120]
mov [edi+8], eax
mov eax, [esi+124]
mov [edi+12], eax
endif
add esi, 128
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
;----------------------------------------
ALIGN 4
nf12: ; low 4x4x16 (32 bytes)
if 0 ;debug
add esi, 32
mov ebx, 0
jmp nf_solid
endif
mov edx, nf_width
Trans16 bx, esi
shrd eax, ebx, 16
mov ax, bx
mov [edi], eax
mov [edi+edx], eax
Trans16 bx, esi+2
shrd eax, ebx, 16
mov ax, bx
mov [edi+4], eax
mov [edi+edx+4], eax
Trans16 bx, esi+4
shrd eax, ebx, 16
mov ax, bx
mov [edi+8], eax
mov [edi+edx+8], eax
Trans16 bx, esi+6
shrd eax, ebx, 16
mov ax, bx
mov [edi+12], eax
mov [edi+edx+12], eax
lea edi, [edi+edx*2]
Trans16 bx, esi+8
shrd eax, ebx, 16
mov ax, bx
mov [edi], eax
mov [edi+edx], eax
Trans16 bx, esi+10
shrd eax, ebx, 16
mov ax, bx
mov [edi+4], eax
mov [edi+edx+4], eax
Trans16 bx, esi+12
shrd eax, ebx, 16
mov ax, bx
mov [edi+8], eax
mov [edi+edx+8], eax
Trans16 bx, esi+14
shrd eax, ebx, 16
mov ax, bx
mov [edi+12], eax
mov [edi+edx+12], eax
lea edi, [edi+edx*2]
Trans16 bx, esi+16
shrd eax, ebx, 16
mov ax, bx
mov [edi], eax
mov [edi+edx], eax
Trans16 bx, esi+18
shrd eax, ebx, 16
mov ax, bx
mov [edi+4], eax
mov [edi+edx+4], eax
Trans16 bx, esi+20
shrd eax, ebx, 16
mov ax, bx
mov [edi+8], eax
mov [edi+edx+8], eax
Trans16 bx, esi+22
shrd eax, ebx, 16
mov ax, bx
mov [edi+12], eax
mov [edi+edx+12], eax
lea edi, [edi+edx*2]
Trans16 bx, esi+24
shrd eax, ebx, 16
mov ax, bx
mov [edi], eax
mov [edi+edx], eax
Trans16 bx, esi+26
shrd eax, ebx, 16
mov ax, bx
mov [edi+4], eax
mov [edi+edx+4], eax
Trans16 bx, esi+28
shrd eax, ebx, 16
mov ax, bx
mov [edi+8], eax
mov [edi+edx+8], eax
Trans16 bx, esi+30
shrd eax, ebx, 16
mov ax, bx
mov [edi+12], eax
mov [edi+edx+12], eax
add edi, edx
sub edi, nfpk_back_right
add esi, 32
retn
;----------------------------------------
ALIGN 4
nf13: ; 2x2 4x4x0 (8 bytes)
if 0 ;debug
add esi, 8
mov ebx, 0
jmp nf_solid
endif
mov edx, nf_width
if TRANS16
Trans16 cx, esi
shrd ebx, ecx, 16
mov bx, cx
Trans16 cx, esi+2
shrd eax, ecx, 16
mov ax, cx
mov ecx, eax
else
mov ax, [esi]
shrd ebx, eax, 16
mov bx, ax
mov ax, [esi+2]
shrd ecx, eax, 16
mov cx, ax
endif
mov [edi], ebx
mov [edi+4], ebx
mov [edi+8], ecx
mov [edi+12], ecx
mov [edi+edx], ebx
mov [edi+edx+4], ebx
mov [edi+edx+8], ecx
mov [edi+edx+12], ecx
lea edi, [edi+edx*2]
mov [edi], ebx
mov [edi+4], ebx
mov [edi+8], ecx
mov [edi+12], ecx
mov [edi+edx], ebx
mov [edi+edx+4], ebx
mov [edi+edx+8], ecx
mov [edi+edx+12], ecx
lea edi, [edi+edx*2]
if TRANS16
Trans16 cx, esi+4
shrd ebx, ecx, 16
mov bx, cx
Trans16 cx, esi+6
shrd eax, ecx, 16
mov ax, cx
mov ecx, eax
else
mov ax, [esi+4]
shrd ebx, eax, 16
mov bx, ax
mov ax, [esi+6]
shrd ecx, eax, 16
mov cx, ax
endif
mov [edi], ebx
mov [edi+4], ebx
mov [edi+8], ecx
mov [edi+12], ecx
mov [edi+edx], ebx
mov [edi+edx+4], ebx
mov [edi+edx+8], ecx
mov [edi+edx+12], ecx
lea edi, [edi+edx*2]
mov [edi], ebx
mov [edi+4], ebx
mov [edi+8], ecx
mov [edi+12], ecx
mov [edi+edx], ebx
mov [edi+edx+4], ebx
mov [edi+edx+8], ecx
mov [edi+edx+12], ecx
add edi, edx
sub edi, nfpk_back_right
add esi, 8
retn
;----------------------------------------
ALIGN 4
nf14: ; 8x8x0 (2 bytes)
Trans16 cx, esi
add esi, 2
shrd ebx, ecx, 16
mov bx, cx
nf_solid:
mov edx, nf_width
mov [edi], ebx
mov [edi+4], ebx
mov [edi+8], ebx
mov [edi+12], ebx
add edi, edx
mov [edi], ebx
mov [edi+4], ebx
mov [edi+8], ebx
mov [edi+12], ebx
add edi, edx
mov [edi], ebx
mov [edi+4], ebx
mov [edi+8], ebx
mov [edi+12], ebx
add edi, edx
mov [edi], ebx
mov [edi+4], ebx
mov [edi+8], ebx
mov [edi+12], ebx
add edi, edx
mov [edi], ebx
mov [edi+4], ebx
mov [edi+8], ebx
mov [edi+12], ebx
add edi, edx
mov [edi], ebx
mov [edi+4], ebx
mov [edi+8], ebx
mov [edi+12], ebx
add edi, edx
mov [edi], ebx
mov [edi+4], ebx
mov [edi+8], ebx
mov [edi+12], ebx
add edi, edx
mov [edi], ebx
mov [edi+4], ebx
mov [edi+8], ebx
mov [edi+12], ebx
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
retn
;----------------------------------------
ALIGN 4
nf15: ; unused
retn
nfHPkDecomp ENDP
endif ; PKDATA
;---------------------------------------------------------------------
; ShowFrame
;------------
EXTERN sf_LineWidth: DWORD ;unsigned sf_LineWidth; // Distance between lines in memory
; Banked screen parameters
EXTERN sf_SetBank: PTRPROC ;unsigned long sf_SetBank;
EXTERN sf_WinGran: DWORD ;unsigned sf_WinGran;
EXTERN sf_WinSize: DWORD ;unsigned long sf_WinSize;
EXTERN sf_WinGranPerSize: DWORD ;unsigned sf_WinGranPerSize;
;{sf_WriteWinPtr and sf_WriteWinLimit replace sf_WriteWinSeg, see mveliba.asm}
EXTERN sf_WriteWinPtr: PTRBYTE ;unsigned char *sf_WriteWinPtr;
EXTERN sf_WriteWinLimit: PTRBYTE ;unsigned char *WriteWinLimit;
EXTERN sf_WriteWin: DWORD ;unsigned sf_WriteWin;
if SCALING
EXTERN opt_hscale_step: DWORD
EXTERN opt_hscale_adj: DWORD
endif
;void mve_ShowFrameField(
; unsigned char *buf, unsigned bufw, unsigned bufh,
; unsigned sx, unsigned sy, unsigned w, unsigned h,
; unsigned dstx, unsigned dsty, unsigned field)
mve_ShowFrameField PROC USES ESI EDI EBX, \
buf:PTRBYTE, bufw:DWORD, bufh:DWORD, \
sx:DWORD, sy:DWORD, w:DWORD, h:DWORD, \
dstx:DWORD, dsty:DWORD, field:DWORD
LOCAL bank:DWORD
LOCAL w4:DWORD
LOCAL new_src_line:DWORD
LOCAL linestep:DWORD
LOCAL new_dst_line:DWORD
mov ax, ds ; Insure es==ds for symantec flat mode
mov es, ax
mov eax, w ; w4 = w>>2
shr eax, 2
mov w4, eax
;;; <WIP>
;;; In stretched width mode, we either keep 4/5 (a) of the source pixels,
;;; or duplicate every fourth pixel to magnify by 5/4 (b).
;;; In these cases, new_src_line is either bufw-w*5/4 (a) or bufw-w*4/5 (b).
;;; Let ScaleStep be 5 (a) or 3 (b) instead of 4. This is the amount to advance
;;; the source after copying 32-bits from source to destination.
;;; The coordinate system used for the source will be a simulated scaled system.
;;; Rather than scale height, I plan to use alternate vertical resolutions. However,
;;; it might be a good idea to also provide for scaled height in case we want a
;;; higher resolution border.
;;; Question: Do we still need to support transferring subrectangles?
if SCALING
.if opt_hscale_step==4
endif
mov eax, bufw ; new_src_line = bufw - w
sub eax, w
mov new_src_line, eax
if SCALING
.else
mov eax, opt_hscale_adj
mov new_src_line, eax
.endif
endif
mov eax, sf_LineWidth ; linestep = sf_LineWidth<<1;
.if field ; if (field)
add eax, eax ; linestep <<= 1;
.endif
mov linestep, eax
sub eax, w ; new_dst_line = linestep - w;
mov new_dst_line, eax
mov eax, sy ; buf += sy*bufw + sx
mul bufw
add eax, sx
add buf, eax
mov eax, sx ; dstx += sx
add dstx, eax
; <WIP> This is a hack. We should pass in src x,y of origin
; or make dstx/dsty absolute.
;
mov eax, bufw ; if (field && sx >= (bufw>>1)
shr eax, 1
.if field && sx >= eax
sub dstx, eax ; dstx -= bufw>>1
.endif
mov eax, sy ; dsty += sy
add dsty, eax
.if sf_SetBank==0 ;------------------
; dst = WriteWinPtr + (dsty*linestep+dstx)
mov edi, sf_WriteWinPtr
mov eax, dsty
mul linestep
add eax, dstx
add edi, eax
.if field & 1
add edi, sf_LineWidth;
.endif
mov eax, new_src_line
mov edx, new_dst_line
mov esi, buf
mov ebx, h
if SCALING
.if opt_hscale_step==3
sub edi, 8
sf_lp2a:mov ecx, w4
shr ecx, 2
ALIGN 4
sf_lp2b:mov eax, [esi]
mov [edi+8], eax
mov eax, [esi+3]
mov [edi+12], eax
add edi, 16
mov eax, [esi+6]
mov [edi], eax
mov eax, [esi+9]
mov [edi+4], eax
add esi, 12
dec ecx
jnz sf_lp2b
; To avoid problem of last pixel coming from next line
; with arrange for w%16==12, so here is where we copy
; last 12 pixels.
mov eax, [esi]
mov [edi+8], eax
mov eax, [esi+3]
mov [edi+12], eax
add edi, 12
mov eax, [esi+6]
mov [edi+4], eax
add esi, 9
add esi, new_src_line
add edi, edx
dec ebx
jnz sf_lp2a
add edi, 8
.else
endif
sf_lp: mov ecx, w4 ;width/4
rep movsd
add esi, eax
add edi, edx
dec ebx
jnz sf_lp
if SCALING
.endif
endif
.else ; sf_SetBank ;------------------
mov esi, buf
; start = dsty * linestep + dstx
mov eax, linestep
mul dsty
.if field & 1
add eax, sf_LineWidth
.endif
add eax, dstx
; bank = start / WinGran
; dst = (start % WinGran) + sf_WriteWinPtr
mov edx, 0
div sf_WinGran
mov bank, eax
mov edi, edx
add edi, sf_WriteWinPtr
; Select new bank
mov bh, 0
mov bl, byte ptr sf_WriteWin
mov edx, bank
call sf_SetBank
; eax/edx destroyed by sf_SetBank
sf_0: ; rem = sf_WriteWinLimit - dst
mov eax, sf_WriteWinLimit
sub eax, edi
; h2 = (rem+(LineWidth-w))/LineWidth
add eax, linestep
sub eax, w
mov edx, 0
div linestep
; if (h<h2) h2=h
cmp h, eax
jae skplim
mov eax, h
skplim: ; if (h2==0) // No full lines can be transfered.
or eax, eax
jz sf_2
; h-= h2
; Transfer h2 lines to screen
sub h, eax
mov ebx, new_src_line
mov edx, new_dst_line
if SCALING
.if opt_hscale_step==3
sub edi, 8
sf_1a3: mov ecx, w4
shr ecx, 2
ALIGN 4
sf_1b3: mov ebx, [esi]
mov [edi+8], ebx
mov ebx, [esi+3]
mov [edi+12], ebx
add edi, 16
mov ebx, [esi+6]
mov [edi], ebx
mov ebx, [esi+9]
mov [edi+4], ebx
add esi, 12
dec ecx
jnz sf_1b3
; To avoid problem of last pixel coming from next line
; with arrange for w%16==12, so here is where we copy
; last 12 pixels.
mov ebx, [esi]
mov [edi+8], ebx
mov ebx, [esi+3]
mov [edi+12], ebx
add edi, 12
mov ebx, [esi+6]
mov [edi+4], ebx
add esi, 9
add esi, new_src_line
add edi, edx
dec eax
jnz sf_1a3
add edi, 8
.else
endif
sf_1: mov ecx, w4 ; width/4
rep movsd
add esi, ebx
add edi, edx
dec eax
jnz sf_1
if SCALING
.endif
endif
sf_2: ; if (h!=0) // There are still lines to be transfered
; // transfer partial line
or eax, h
jz sf_9
; w4a = 0 max (sf_WriteWinLimit-dst)/4
mov ecx, sf_WriteWinLimit
sub ecx, edi
sar ecx, 2
jns sf_2b
mov ecx, 0
sf_2b: push ecx ; Save size of first half
if SCALING
.if opt_hscale_step==3
ALIGN 4
or ecx, ecx
jz sf_2c3
sf_2b3: mov eax, [esi]
mov [edi], eax
add esi, 3
add edi, 4
dec ecx
jnz sf_2b3
sf_2c3:
.else
endif
rep movsd
if SCALING
.endif
endif
; bank += WinSize/WinGran //Assumes WinSize%WinGran==0
; off -= (WinSize/WinGran)*WinGran == WinSize
mov eax, sf_WinGranPerSize
add bank, eax
sub edi, sf_WinSize
; Select new bank
mov bh, 0
mov bl, byte ptr sf_WriteWin
mov edx, bank
call sf_SetBank
; eax/edx destroyed by sf_SetBank
; w4b = w4-w4a // Size of second half
pop eax ; Size of first half
mov ecx, w4
sub ecx, eax ; Size of 2nd half
if SCALING
.if opt_hscale_step==3
ALIGN 4
or ecx, ecx
jz sf_8b3
sf_8a3: mov eax, [esi]
mov [edi], eax
add esi, 3
add edi, 4
dec ecx
jnz sf_8a3
sf_8b3:
.else
endif
rep movsd
if SCALING
.endif
endif
add esi, new_src_line
add edi, new_dst_line
;; --h // Count split line
;; if (h!=0) continue
dec h
jnz sf_0
sf_9:
.endif ; sf_SetBank==0 ;------------------
ret
mve_ShowFrameField ENDP
; Special version of mve_ShowFrameField which converts from
; 256 colors to 15-bit color. Does not support horizontal
; magnification.
;void mve_ShowFrameFieldHi(
; unsigned char *buf, unsigned bufw, unsigned bufh,
; unsigned sx, unsigned sy, unsigned w, unsigned h,
; unsigned dstx, unsigned dsty, unsigned field)
mve_ShowFrameFieldHi PROC USES ESI EDI EBX, \
buf:PTRBYTE, bufw:DWORD, bufh:DWORD, \
sx:DWORD, sy:DWORD, w:DWORD, h:DWORD, \
dstx:DWORD, dsty:DWORD, field:DWORD
LOCAL bank:DWORD
LOCAL w4:DWORD
LOCAL new_src_line:DWORD
LOCAL linestep:DWORD
LOCAL new_dst_line:DWORD
mov ax, ds ; Insure es==ds for symantec flat mode
mov es, ax
mov eax, w ; w4 = w>>1
shr eax, 1
mov w4, eax ; Number of dst words to transfer
mov eax, bufw ; new_src_line = bufw - w
sub eax, w
mov new_src_line, eax
mov eax, sf_LineWidth ; linestep = sf_LineWidth<<1;
.if field ; if (field)
add eax, eax ; linestep <<= 1;
.endif
mov linestep, eax
sub eax, w ; new_dst_line = linestep - w*2;
sub eax, w
mov new_dst_line, eax
mov eax, sy ; buf += sy*bufw + sx
mul bufw
add eax, sx
add buf, eax
mov eax, sx ; dstx += sx
add dstx, eax
; <WIP> This is a hack. We should pass in src x,y of origin
; or make dstx/dsty absolute.
;
mov eax, bufw ; if (field && sx >= (bufw>>1)
shr eax, 1
.if field && sx >= eax
sub dstx, eax ; dstx -= bufw>>1
.endif
mov eax, sy ; dsty += sy
add dsty, eax
.if sf_SetBank==0 ;------------------
; dst = WriteWinPtr + (dsty*linestep+dstx*2)
mov edi, sf_WriteWinPtr
mov eax, dsty
mul linestep
add eax, dstx
add eax, dstx
add edi, eax
.if field & 1
add edi, sf_LineWidth;
.endif
mov esi, buf
mov ebx, h
sf_lp: mov ecx, w4 ;width/4
push ebx
lea ebx, pal15_tbl
xor eax, eax
sf_movsd1:
mov al, [esi]
add esi, 2
mov dx, [ebx+eax*2]
mov al, [esi-1]
shl edx, 16
mov dx, [ebx+eax*2]
rol edx, 16
mov [edi], edx
add edi, 4
dec ecx
jnz sf_movsd1
pop ebx
; rep movsd ;;;;;-----;;;;;
add esi, new_src_line
add edi, new_dst_line
dec ebx
jnz sf_lp
.else ; sf_SetBank ;------------------
mov esi, buf
; start = dsty * linestep + dstx*2
mov eax, linestep
mul dsty
.if field & 1
add eax, sf_LineWidth
.endif
add eax, dstx
add eax, dstx
; bank = start / WinGran
; dst = (start % WinGran) + sf_WriteWinPtr
mov edx, 0
div sf_WinGran
mov bank, eax
mov edi, edx
add edi, sf_WriteWinPtr
; Select new bank
mov bh, 0
mov bl, byte ptr sf_WriteWin
mov edx, bank
call sf_SetBank
; eax/edx destroyed by sf_SetBank
sf_0: ; rem = sf_WriteWinLimit - dst
mov eax, sf_WriteWinLimit
sub eax, edi
; h2 = (rem+(LineWidth-w*2))/LineWidth
add eax, new_dst_line
mov edx, 0
div linestep
; if (h<h2) h2=h
cmp h, eax
jae skplim
mov eax, h
skplim: ; if (h2==0) // No full lines can be transfered.
or eax, eax
jz sf_2
; h-= h2
; Transfer h2 lines to screen
sub h, eax
lea ebx, pal15_tbl
sf_1: mov ecx, w4 ; width/4
push eax
xor eax, eax
sf_movsd2:
mov al, [esi]
add esi, 2
mov dx, [ebx+eax*2]
mov al, [esi-1]
shl edx, 16
mov dx, [ebx+eax*2]
rol edx, 16
mov [edi], edx
add edi, 4
dec ecx
jnz sf_movsd2
pop eax
; rep movsd ;;;;;-----;;;;;
add esi, new_src_line
add edi, new_dst_line
dec eax
jnz sf_1
sf_2: ; if (h!=0) // There are still lines to be transfered
; // transfer partial line
or eax, h
jz sf_9
; w4a = 0 max (sf_WriteWinLimit-dst)/4
mov ecx, sf_WriteWinLimit
sub ecx, edi
sar ecx, 2
jns sf_2b
mov ecx, 0
sf_2b: push ecx ; Save size of first half
or ecx, ecx
jz sf_movsd3b
xor eax, eax
lea ebx, pal15_tbl
sf_movsd3:
mov al, [esi]
add esi, 2
mov dx, [ebx+eax*2]
mov al, [esi-1]
shl edx, 16
mov dx, [ebx+eax*2]
rol edx, 16
mov [edi], edx
add edi, 4
dec ecx
jnz sf_movsd3
sf_movsd3b:
; rep movsd ;;;;;-----;;;;;
; bank += WinSize/WinGran //Assumes WinSize%WinGran==0
; off -= (WinSize/WinGran)*WinGran == WinSize
mov eax, sf_WinGranPerSize
add bank, eax
sub edi, sf_WinSize
; Select new bank
mov bh, 0
mov bl, byte ptr sf_WriteWin
mov edx, bank
call sf_SetBank
; eax/edx destroyed by sf_SetBank
; w4b = w4-w4a // Size of second half
pop eax ; Size of first half
mov ecx, w4
sub ecx, eax ; Size of 2nd half
or ecx, ecx
jz sf_movsd4b
lea ebx, pal15_tbl
xor eax, eax
sf_movsd4:
mov al, [esi]
add esi, 2
mov dx, [ebx+eax*2]
mov al, [esi-1]
shl edx, 16
mov dx, [ebx+eax*2]
rol edx, 16
mov [edi], edx
add edi, 4
dec ecx
jnz sf_movsd4
sf_movsd4b:
; rep movsd ;;;;;-----;;;;;
add esi, new_src_line
add edi, new_dst_line
;; --h // Count split line
;; if (h!=0) continue
dec h
jnz sf_0
sf_9:
.endif ; sf_SetBank==0 ;------------------
ret
mve_ShowFrameFieldHi ENDP
if PARTIAL
SHOW_FRAME_CHG_BODY MACRO HI_COLOR_FLAG:REQ
LOCAL HI_COLOR_SCALE
HI_COLOR_SCALE equ HI_COLOR_FLAG+1
mov ax, ds ; Insure es==ds for symantec flat mode
mov es, ax
mov eax, w ; _width = w*SWIDTH*HI_COLOR_SCALE;
shl eax, LOG2_SWIDTH+HI_COLOR_FLAG
mov _width, eax
xor ebx, ebx ; ebx = nf_fqty (converted to 32-bits)
mov bl, nf_fqty
mov eax, nf_width ; SrcWidth = nf_width*nf_fqty;
mul ebx ;nf_fqty
mov SrcWidth, eax
imul eax, (SHEIGHT-1) ; SrcWidth7 = SrcWidth * (SHEIGHT-1)
mov SrcWidth7, eax
add eax, SrcWidth ; SrcLineStep = SrcWidth*SHEIGHT-_width
sub eax, _width
mov SrcLineStep, eax
mov eax, sf_LineWidth ; DstWidth = sf_LineWidth*nf_fqty;
mul ebx ;nf_fqty
mov DstWidth, eax
imul eax, (SHEIGHT-1) ; DstWidth7 = DstWidth * (SHEIGHT-1)
mov DstWidth7, eax
;Note: DstLineStep1+2 = DstWidth*SHEIGHT - _width = ????Not True!!!
dec eax ; DstLineStep1 = DstWidth*(SHEIGHT-1)-1
mov DstLineStep1, eax
mov eax, DstWidth ; DstLineStep2 = DstWidth-_width+1
sub eax, _width
inc eax
mov DstLineStep2, eax
mov eax, DstLineStep1 ; LineEnd = DstWidth*(SHEIGHT-1)+_width-1
add eax, _width
mov LineEnd, eax
; esi = buf (pointer into buf)
; ebx = pointer into chgs
; dx = current chgs word
; edi = pointer into screen
; ecx = remaining square lines to copy
.if prvbuf ; buf = prvbuf ? nf_buf_prv : nf_buf_cur
mov esi, nf_buf_prv
.else
mov esi, nf_buf_cur
.endif
mov eax, y ; + y*SHEIGHT*nf_WIDTH
shl eax, LOG2_SHEIGHT
mul nf_width
add esi, eax
mov eax, x ; + x*SWIDTH*HI_COLOR_SCALE
shl eax, LOG2_SWIDTH+HI_COLOR_FLAG
add esi, eax
; dstx must be a multiple of 4 because everything is done on 32-bit words
; and bank crossing checks don't check for a crossing within a word.
and dstx, NOT 3 ; dstx &= ~3
mov ebx, chgs
mov dx, 0
mov cl, nf_fqty
nxtfld: push ecx
push esi
mov ecx, h
push ebx
push edx
mov eax, sf_LineWidth
mul dsty
add eax, dstx
; bank = start / WinGran
; dst = (start % WinGran) + sf_WriteWinPtr
mov edx, 0
div sf_WinGran
mov bank, eax
mov edi, edx
add edi, sf_WriteWinPtr
; Select new bank
cmp sf_SetBank, 0
jz nobank
mov bh, 0
mov bl, byte ptr sf_WriteWin
mov edx, bank
call sf_SetBank
; eax/edx destroyed by sf_SetBank
nobank: pop edx
pop ebx
NextLine:
push ecx
mov eax, edi
add eax, LineEnd ; (SHEIGHT-1)*DstWidth+_width-1
sub eax, sf_WriteWinLimit
jb NoSplit
jmp Split
LineDone:
pop ecx
add esi, SrcLineStep ; Move back to start column, down SHEIGHT
add edi, DstLineStep1 ; First advance to last byte
add edi, DstLineStep2 ; Then advance to new start
loop NextLine
pop esi
pop ecx
add esi, nf_width
inc dsty
dec cl
jnz nxtfld
jmp Finished
; --- Copy full squares ---
; Scan over contiguous unchanged squares up to max per line
; For each unchanged square, add 8 (SWIDTH) to esi and edi.
; count # of contiguous changed squares up to max per line
; Init eax to 0, ebx and edx to line steps for source and dest.
; For each square, add 2 (SWIDTH/4) to eax and subtract 8 (SWIDTH)
; from ebx and edx.
NoSplit:
mov ecx, w
mov eax, 0
jmp fTest1
fNext1: mov dx, [ebx]
add ebx, 2
fTest1: add dx, dx
jz fNext1
jb fChgd2
add esi, SWIDTH*HI_COLOR_SCALE
add edi, SWIDTH*HI_COLOR_SCALE
loop fTest1
jmp LineDone
fNext2: mov dx, word ptr [ebx]
add ebx, 2
fTest2: add dx, dx
ja fCopy3
jz fNext2
fChgd2: add eax, SWIDTH*HI_COLOR_SCALE/4
loop fTest2
call fCopy
jmp LineDone
fCopy3: call fCopy
add esi, SWIDTH*HI_COLOR_SCALE
add edi, SWIDTH*HI_COLOR_SCALE
mov eax, 0
loop fTest1
jmp LineDone
fCopy: push ebx
push ecx
push edx
mov ecx, eax
shl ecx, 2
mov ebx, SrcWidth
sub ebx, ecx
mov edx, DstWidth
sub edx, ecx
REPEAT 7
mov ecx, eax
rep movsd
add esi, ebx
add edi, edx
ENDM
mov ecx, eax
rep movsd
sub esi, SrcWidth7
sub edi, DstWidth7
pop edx
pop ecx
pop ebx
retn
; --- Copy squares across bank boundary ---
; (occurs infrequently, but should be streamlined as much as possible
; because it could potentially be much more expensive than normal
; operation).
; HMMM... 16*640 = 10240 = approx 1/6 64K, so for 640x480,
; roughly 1 in 6 square lines will need special processing
; (actually, 2 in 12 due to interlacing, but that's the same ratio).
; Repeat above twice, once for end of cur bank, once for start
; of next bank, with following modifications:
; For end of cur bank...
; if di>=WinSize, we're done with squares (but need to correctly
; adjust si and di!)
; if di+4*cx>WinSize, we need to clip and then we're done
; (but need to correctly adjust si and di!)
; Reduce cx to (WinSize-di)/4.
; limit=WriteWinLimit-4*ax
Split: push ebx
push edx
push esi
push edi
mov ecx, w
mov eax, 0
jmp aTest1
aNext1: mov dx, word ptr [ebx]
add ebx, 2
aTest1: add dx, dx
jz aNext1
jb aChgd2
add esi, SWIDTH*HI_COLOR_SCALE
add edi, SWIDTH*HI_COLOR_SCALE
loop aTest1
jmp aDone
aNext2: mov dx, [ebx]
add ebx, 2
aTest2: add dx, dx
ja aCopy3
jz aNext2
aChgd2: add eax, SWIDTH*HI_COLOR_SCALE/4
loop aTest2
call aCopy
jmp aDone
aCopy3: call aCopy
add esi, SWIDTH*HI_COLOR_SCALE
add edi, SWIDTH*HI_COLOR_SCALE
mov eax, 0
loop aTest1
jmp aDone
aCopy: push ebx
push ecx
push edx
push esi
push edi
mov ecx, eax
shl ecx, 2
mov ebx, SrcWidth
sub ebx, ecx
mov edx, DstWidth
sub edx, ecx
sub ecx, sf_WriteWinLimit
neg ecx
mov limit, ecx
REPEAT 7
cmp edi, limit
jns aFinal
mov ecx, eax
rep movsd
add esi, ebx
add edi, edx
ENDM
cmp edi, limit
jns aFinal
mov ecx, eax
jmp aLast
aFinal: mov ecx, sf_WriteWinLimit
sub ecx, edi
js aCpyDn
shr ecx, 2
aLast: rep movsd
aCpyDn: pop edi
pop esi
mov ecx, eax
shl ecx, 2
add esi, ecx
add edi, ecx
pop edx
pop ecx
pop ebx
retn
aDone: pop edi
pop esi
; Advance bank
mov eax, sf_WinGranPerSize
add bank, eax
sub edi, sf_WinSize
; Select new bank
cmp sf_SetBank, 0
jz nobank2
mov bh, 0
mov bl, byte ptr sf_WriteWin
mov edx, bank
call sf_SetBank
; eax/edx destroyed by SetBank
nobank2:
pop edx
pop ebx
; For start of next bank...
; While di+4*cx<=0, advance si & di by src/dst line step instead of
; doing rep mov
; If di<0, cx += di/4, si-=di, di=0
; Do remaining rep mov's (first with modified args, remainder with
; with full args).
; Init bx/dx to src/dst line steps.
; limit=-4*ax
mov ecx, w
mov eax, 0
jmp bTest1
bNext1: mov dx, [ebx]
add ebx, 2
bTest1: add dx, dx
jz bNext1
jb bChgd2
add esi, SWIDTH*HI_COLOR_SCALE
add edi, SWIDTH*HI_COLOR_SCALE
loop bTest1
jmp LineDone
bNext2: mov dx, [ebx]
add ebx, 2
bTest2: add dx, dx
ja bCopy3
jz bNext2
bChgd2: add eax, SWIDTH*HI_COLOR_SCALE/4
loop bTest2
call bCopy
jmp LineDone
bCopy3: call bCopy
add esi, SWIDTH*HI_COLOR_SCALE
add edi, SWIDTH*HI_COLOR_SCALE
mov eax, 0
loop bTest1
jmp LineDone
bCopy: push ebx
push ecx
push edx
mov ecx, eax
shl ecx, 2
neg ecx
mov limit, ecx
mov ebx, SrcWidth
mov edx, DstWidth
sub edi, sf_WriteWinPtr
FOR bMovN, <bMov1,bMov2,bMov3,bMov4,bMov5,bMov6,bMov7>
mov ecx, offset bMovN
jns bFull
cmp limit, edi
js bPart
add esi, ebx
add edi, edx
ENDM
mov ecx, offset bMov8
jns bFull
cmp limit, edi
js bPart
add edi, sf_WriteWinPtr
shl eax, 2
add esi, eax
add edi, eax
jmp bCpyDn
bFull: push ecx
mov ecx, eax
add ebx, limit
add edx, limit
add edi, sf_WriteWinPtr
retn
bPart: push ecx
mov ecx, eax
sub esi, edi
sar edi, 2
add ecx, edi
mov edi, sf_WriteWinPtr
add ebx, limit
add edx, limit
retn
FOR bMovN, <bMov1,bMov2,bMov3,bMov4,bMov5,bMov6,bMov7>
bMovN: rep movsd
mov ecx, eax
add esi, ebx
add edi, edx
ENDM
bMov8: rep movsd
bCpyDn: sub esi, SrcWidth7
sub edi, DstWidth7
pop edx
pop ecx
pop ebx
retn
Finished:
ret
ENDM ; SHOW_FRAME_CHG_BODY
;void
;mve_sfShowFrameChg(
; bool prvbuf,
; unsigned x, unsigned y, unsigned w, unsigned h,
; unsigned short *chgs,
; unsigned dstx, unsigned dsty)
;
mve_sfShowFrameChg PROC USES ESI EDI EBX, \
prvbuf:DWORD, \
x:DWORD, y:DWORD, w:DWORD, h:DWORD, \
chgs:PTRWORD, \
dstx:DWORD, dsty:DWORD
LOCAL _width:DWORD
LOCAL SrcWidth:DWORD
LOCAL DstWidth:DWORD
LOCAL SrcWidth7:DWORD
LOCAL DstWidth7:DWORD
LOCAL SrcLineStep:DWORD
LOCAL DstLineStep1:DWORD
LOCAL DstLineStep2:DWORD
LOCAL LineEnd:DWORD
LOCAL bank:DWORD
LOCAL limit:DWORD
SHOW_FRAME_CHG_BODY 0 ; Not HiColor
mve_sfShowFrameChg ENDP
if HICOLOR
;void
;mve_sfHiColorShowFrameChg(
; bool prvbuf,
; unsigned x, unsigned y, unsigned w, unsigned h,
; unsigned short *chgs,
; unsigned dstx, unsigned dsty)
;
mve_sfHiColorShowFrameChg PROC USES ESI EDI EBX, \
prvbuf:DWORD, \
x:DWORD, y:DWORD, w:DWORD, h:DWORD, \
chgs:PTRWORD, \
dstx:DWORD, dsty:DWORD
LOCAL _width:DWORD
LOCAL SrcWidth:DWORD
LOCAL DstWidth:DWORD
LOCAL SrcWidth7:DWORD
LOCAL DstWidth7:DWORD
LOCAL SrcLineStep:DWORD
LOCAL DstLineStep1:DWORD
LOCAL DstLineStep2:DWORD
LOCAL LineEnd:DWORD
LOCAL bank:DWORD
LOCAL limit:DWORD
SHOW_FRAME_CHG_BODY 1 ; HiColor
mve_sfHiColorShowFrameChg ENDP
endif ;HICOLOR
endif ;PARTIAL
;----------------------------------------------------------------------
if 0 ; No supported
if PKDATA
PK_SHOW_FRAME_CHG_BODY MACRO HI_COLOR_FLAG:REQ
LOCAL HI_COLOR_SCALE
HI_COLOR_SCALE equ HI_COLOR_FLAG+1
mov eax, w ; _width = w*SWIDTH*HI_COLOR_SCALE;
shl eax, LOG2_SWIDTH+HI_COLOR_FLAG
mov _width, eax
xor ebx, ebx ; ebx = nf_fqty (converted to 32-bits)
mov bl, nf_fqty
mov eax, nf_width ; SrcWidth = nf_width*nf_fqty;
mul ebx ;nf_fqty
mov SrcWidth, eax
imul eax, (SHEIGHT-1) ; SrcWidth7 = SrcWidth * (SHEIGHT-1)
mov SrcWidth7, eax
add eax, SrcWidth ; SrcLineStep = SrcWidth*SHEIGHT-_width
sub eax, _width
mov SrcLineStep, eax
mov eax, sf_LineWidth ; DstWidth = sf_LineWidth*nf_fqty;
mul ebx ;nf_fqty
mov DstWidth, eax
imul eax, (SHEIGHT-1) ; DstWidth7 = DstWidth * (SHEIGHT-1)
mov DstWidth7, eax
;Note: DstLineStep1+2 = DstWidth*SHEIGHT - _width = ????Not True!!!
dec eax ; DstLineStep1 = DstWidth*(SHEIGHT-1)-1
mov DstLineStep1, eax
mov eax, DstWidth ; DstLineStep2 = DstWidth-_width+1
sub eax, _width
inc eax
mov DstLineStep2, eax
mov eax, DstLineStep1 ; LineEnd = DstWidth*(SHEIGHT-1)+_width-1
add eax, _width
mov LineEnd, eax
; esi = buf (pointer into buf)
; ebx = pointer into ops
; dx = temp for current op. dl xor dh keeps just upper nibble op.
; edi = pointer into screen
; ecx = remaining square lines to copy
.if prvbuf ; buf = prvbuf ? nf_buf_prv : nf_buf_cur
mov esi, nf_buf_prv
.else
mov esi, nf_buf_cur
.endif
mov eax, y ; + y*SHEIGHT*nf_WIDTH
shl eax, LOG2_SHEIGHT
mul nf_width
add esi, eax
mov eax, x ; + x*SWIDTH*HI_COLOR_SCALE
shl eax, LOG2_SWIDTH+HI_COLOR_FLAG
add esi, eax
; dstx must be a multiple of 4 because everything is done on 32-bit words
; and bank crossing checks don't check for a crossing within a word.
and dstx, NOT 3 ; dstx &= ~3
mov ebx, ops
mov cl, nf_fqty
nxtfld: push ecx
push esi
mov ecx, h
push ebx
mov eax, sf_LineWidth
mul dsty
add eax, dstx
; bank = start / WinGran
; dst = (start % WinGran) + sf_WriteWinPtr
mov edx, 0
div sf_WinGran
mov bank, eax
mov edi, edx
add edi, sf_WriteWinPtr
; Select new bank
cmp sf_SetBank, 0
jz nobank
mov bh, 0
mov bl, byte ptr sf_WriteWin
mov edx, bank
call sf_SetBank
; eax/edx destroyed by sf_SetBank
nobank: pop ebx
NextLine:
push ecx
mov eax, edi
add eax, LineEnd ; (SHEIGHT-1)*DstWidth+_width-1
sub eax, sf_WriteWinLimit
jb NoSplit
jmp Split
LineDone:
pop ecx
add esi, SrcLineStep ; Move back to start column, down SHEIGHT
add edi, DstLineStep1 ; First advance to last byte
add edi, DstLineStep2 ; Then advance to new start
loop NextLine
pop esi
pop ecx
add esi, nf_width
inc dsty
dec cl
jnz nxtfld
jmp Finished
; --- Copy full squares ---
; Scan over contiguous unchanged squares up to max per line
; For each unchanged square, add 8 (SWIDTH) to esi and edi.
; count # of contiguous changed squares up to max per line
; Init eax to 0, ebx and edx to line steps for source and dest.
; For each square, add 2 (SWIDTH/4) to eax and subtract 8 (SWIDTH)
; from ebx and edx.
NoSplit:
mov ecx, w
shr ecx, 1
mov eax, 0
fNext1: mov dl, [ebx]
inc ebx
mov dh, dl
and dh, 0Fh
jnz fChgd2a
fTest1a:add esi, SWIDTH*HI_COLOR_SCALE
add edi, SWIDTH*HI_COLOR_SCALE
xor dl, dh
jnz fChgd2b
fTest1b:add esi, SWIDTH*HI_COLOR_SCALE
add edi, SWIDTH*HI_COLOR_SCALE
dec ecx
jnz fNext1
jmp LineDone
fNext2: mov dl, [ebx]
inc ebx
mov dh, dl
and dh, 0Fh
jz fCopy3a
fChgd2a:add eax, SWIDTH*HI_COLOR_SCALE/4
xor dl, dh
jz fCopy3b
fChgd2b:add eax, SWIDTH*HI_COLOR_SCALE/4
dec ecx
jnz fNext2
call fCopy
jmp LineDone
fCopy3a:call fCopy
xor eax, eax
jmp fTest1a
fCopy3b:call fCopy
xor eax, eax
jmp fTest1b
fCopy: push ebx
push ecx
push edx
mov ecx, eax
shl ecx, 2
mov ebx, SrcWidth
sub ebx, ecx
mov edx, DstWidth
sub edx, ecx
REPEAT 7
mov ecx, eax
rep movsd
add esi, ebx
add edi, edx
ENDM
mov ecx, eax
rep movsd
sub esi, SrcWidth7
sub edi, DstWidth7
pop edx
pop ecx
pop ebx
retn
; --- Copy squares across bank boundary ---
; (occurs infrequently, but should be streamlined as much as possible
; because it could potentially be much more expensive than normal
; operation).
; HMMM... 16*640 = 10240 = approx 1/6 64K, so for 640x480,
; roughly 1 in 6 square lines will need special processing
; (actually, 2 in 12 due to interlacing, but that's the same ratio).
; Repeat above twice, once for end of cur bank, once for start
; of next bank, with following modifications:
; For end of cur bank...
; if di>=WinSize, we're done with squares (but need to correctly
; adjust si and di!)
; if di+4*cx>WinSize, we need to clip and then we're done
; (but need to correctly adjust si and di!)
; Reduce cx to (WinSize-di)/4.
; limit=WriteWinLimit-4*ax
Split: push ebx
push esi
push edi
mov ecx, w
shr ecx, 1
mov eax, 0
aNext1: mov dl, [ebx]
inc ebx
mov dh, dl
and dh, 0Fh
jnz aChgd2a
aTest1a:add esi, SWIDTH*HI_COLOR_SCALE
add edi, SWIDTH*HI_COLOR_SCALE
xor dl, dh
jnz aChgd2b
aTest1b:add esi, SWIDTH*HI_COLOR_SCALE
add edi, SWIDTH*HI_COLOR_SCALE
dec ecx
jnz aNext1
jmp LineDone
aNext2: mov dl, [ebx]
inc ebx
mov dh, dl
and dh, 0Fh
jz aCopy3a
aChgd2a:add eax, SWIDTH*HI_COLOR_SCALE/4
xor dl, dh
jz aCopy3b
aChgd2b:add eax, SWIDTH*HI_COLOR_SCALE/4
dec ecx
jnz aNext2
call aCopy
jmp LineDone
aCopy3a:call aCopy
xor eax, eax
jmp aTest1a
aCopy3b:call aCopy
xor eax, eax
jmp aTest1b
aCopy: push ebx
push ecx
push edx
push esi
push edi
mov ecx, eax
shl ecx, 2
mov ebx, SrcWidth
sub ebx, ecx
mov edx, DstWidth
sub edx, ecx
sub ecx, sf_WriteWinLimit
neg ecx
mov limit, ecx
REPEAT 7
cmp edi, limit
jns aFinal
mov ecx, eax
rep movsd
add esi, ebx
add edi, edx
ENDM
cmp edi, limit
jns aFinal
mov ecx, eax
jmp aLast
aFinal: mov ecx, sf_WriteWinLimit
sub ecx, edi
js aCpyDn
shr ecx, 2
aLast: rep movsd
aCpyDn: pop edi
pop esi
mov ecx, eax
shl ecx, 2
add esi, ecx
add edi, ecx
pop edx
pop ecx
pop ebx
retn
aDone: pop edi
pop esi
; Advance bank
mov eax, sf_WinGranPerSize
add bank, eax
sub edi, sf_WinSize
; Select new bank
cmp sf_SetBank, 0
jz nobank2
mov bh, 0
mov bl, byte ptr sf_WriteWin
mov edx, bank
call sf_SetBank
; eax/edx destroyed by SetBank
nobank2:
pop ebx
; For start of next bank...
; While di+4*cx<=0, advance si & di by src/dst line step instead of
; doing rep mov
; If di<0, cx += di/4, si-=di, di=0
; Do remaining rep mov's (first with modified args, remainder with
; with full args).
; Init bx/dx to src/dst line steps.
; limit=-4*ax
mov ecx, w
shr ecx, 1
mov eax, 0
bNext1: mov dl, [ebx]
inc ebx
mov dh, dl
and dh, 0Fh
jnz bChgd2a
bTest1a:add esi, SWIDTH*HI_COLOR_SCALE
add edi, SWIDTH*HI_COLOR_SCALE
xor dl, dh
jnz bChgd2b
bTest1b:add esi, SWIDTH*HI_COLOR_SCALE
add edi, SWIDTH*HI_COLOR_SCALE
dec ecx
jnz bNext1
jmp LineDone
bNext2: mov dl, [ebx]
inc ebx
mov dh, dl
and dh, 0Fh
jz bCopy3a
bChgd2a:add eax, SWIDTH*HI_COLOR_SCALE/4
xor dl, dh
jz bCopy3b
bChgd2b:add eax, SWIDTH*HI_COLOR_SCALE/4
dec ecx
jnz bNext2
call bCopy
jmp LineDone
bCopy3a:call bCopy
xor eax, eax
jmp bTest1a
bCopy3b:call bCopy
xor eax, eax
jmp bTest1b
bCopy: push ebx
push ecx
push edx
mov ecx, eax
shl ecx, 2
neg ecx
mov limit, ecx
mov ebx, SrcWidth
mov edx, DstWidth
sub edi, sf_WriteWinPtr
FOR bMovN, <bMov1,bMov2,bMov3,bMov4,bMov5,bMov6,bMov7>
mov ecx, offset bMovN
jns bFull
cmp limit, edi
js bPart
add esi, ebx
add edi, edx
ENDM
mov ecx, offset bMov8
jns bFull
cmp limit, edi
js bPart
add edi, sf_WriteWinPtr
shl eax, 2
add esi, eax
add edi, eax
jmp bCpyDn
bFull: push ecx
mov ecx, eax
add ebx, limit
add edx, limit
add edi, sf_WriteWinPtr
retn
bPart: push ecx
mov ecx, eax
sub esi, edi
sar edi, 2
add ecx, edi
mov edi, sf_WriteWinPtr
add ebx, limit
add edx, limit
retn
FOR bMovN, <bMov1,bMov2,bMov3,bMov4,bMov5,bMov6,bMov7>
bMovN: rep movsd
mov ecx, eax
add esi, ebx
add edi, edx
ENDM
bMov8: rep movsd
bCpyDn: sub esi, SrcWidth7
sub edi, DstWidth7
pop edx
pop ecx
pop ebx
retn
Finished:
ret
ENDM ; PK_SHOW_FRAME_CHG_BODY
;void
;mve_sfPkShowFrameChg(
; bool prvbuf,
; unsigned x, unsigned y, unsigned w, unsigned h,
; unsigned char *ops,
; unsigned dstx, unsigned dsty)
;
mve_sfPkShowFrameChg PROC USES ESI EDI EBX, \
prvbuf:DWORD, \
x:DWORD, y:DWORD, w:DWORD, h:DWORD, \
ops:PTRBYTE, \
dstx:DWORD, dsty:DWORD
LOCAL _width:DWORD
LOCAL SrcWidth:DWORD
LOCAL DstWidth:DWORD
LOCAL SrcWidth7:DWORD
LOCAL DstWidth7:DWORD
LOCAL SrcLineStep:DWORD
LOCAL DstLineStep1:DWORD
LOCAL DstLineStep2:DWORD
LOCAL LineEnd:DWORD
LOCAL bank:DWORD
LOCAL limit:DWORD
PK_SHOW_FRAME_CHG_BODY 0 ; Not HiColor
mve_sfPkShowFrameChg ENDP
if HICOLOR
;void
;mve_sfPkHiColorShowFrameChg(
; bool prvbuf,
; unsigned x, unsigned y, unsigned w, unsigned h,
; unsigned char *ops,
; unsigned dstx, unsigned dsty)
;
mve_sfPkHiColorShowFrameChg PROC USES ESI EDI EBX, \
prvbuf:DWORD, \
x:DWORD, y:DWORD, w:DWORD, h:DWORD, \
ops:PTRBYTE, \
dstx:DWORD, dsty:DWORD
LOCAL _width:DWORD
LOCAL SrcWidth:DWORD
LOCAL DstWidth:DWORD
LOCAL SrcWidth7:DWORD
LOCAL DstWidth7:DWORD
LOCAL SrcLineStep:DWORD
LOCAL DstLineStep1:DWORD
LOCAL DstLineStep2:DWORD
LOCAL LineEnd:DWORD
LOCAL bank:DWORD
LOCAL limit:DWORD
PK_SHOW_FRAME_CHG_BODY 1 ; HiColor
mve_sfPkHiColorShowFrameChg ENDP
endif ;HICOLOR
endif ;PKDATA
endif
;---------------------------------------------------------------------
; Palette Management
;---------------------
;void __cdecl
;MVE_SetPalette(unsigned char *p, unsigned start, unsigned count)
;
MVE_SetPalette PROC USES ESI EBX, \
p:PTRBYTE, start:DWORD, count:DWORD
mov eax, start
mov ecx, count
mov esi, p
.if eax>=256 ; if (start>=256) return;
ret
.endif
lea ebx, [eax+ecx] ; if (start+count>256)
.if ebx>256
mov ecx, 256 ; count = 256-start
sub ecx, eax
.endif
add esi, eax ; p += start*3
add esi, eax
add esi, eax
lea ecx, [ecx+2*ecx] ; count *= 3
mov edx, 03c8h ; DAC Write Index Register
out dx, al ; Init write index to start
inc edx ; DAC Data Register
rep outsb
ret
MVE_SetPalette ENDP
; If at least 11 palette entries aren't changed, this is more compact
; than uncompressed 256 entry palette.
;
;static void palLoadCompPalette(unsigned char *buf)
;
palLoadCompPalette PROC USES ESI EDI, \
buf: PTRBYTE
mov ax, ds ; Insure es==ds for symantec flat mode
mov es, ax
mov cx, 32
mov esi, buf
mov edi, offset pal_tbl
next: lodsb
or al, al
jnz chk0
add edi, 24
loop next
jmp done
chk0: test al, 1
jz not0
movsw
movsb
test al, 2
jz not1
cpy1: movsw
movsb
test al, 4
jz not2
cpy2: movsw
movsb
test al, 8
jz not3
cpy3: movsw
movsb
test al, 16
jz not4
cpy4: movsw
movsb
test al, 32
jz not5
cpy5: movsw
movsb
test al, 64
jz not6
cpy6: movsw
movsb
or al, al
jns not7
cpy7: movsw
movsb
loop next
jmp done
not0: add edi, 3
test al, 2
jnz cpy1
not1: add edi, 3
test al, 4
jnz cpy2
not2: add edi, 3
test al, 8
jnz cpy3
not3: add edi, 3
test al, 16
jnz cpy4
not4: add edi, 3
test al, 32
jnz cpy5
not5: add edi, 3
test al, 64
jnz cpy6
not6: add edi, 3
or al, al
js cpy7
not7: add edi, 3
loop next
done: ret
palLoadCompPalette ENDP
;-----------------------------------------------------------------------
; Graphics
;----------
gfxMode proc USES EBP ESI EDI EBX, mode:DWORD
mov eax, mode
int 10h
ret
gfxMode endp
gfxLoadCrtc proc USES ESI EDI EBX, crtc:PTRBYTE, chain4:BYTE, res:BYTE
mov edx, 03c4h ; alter sequence registers
mov al, 04h ; disable or enable chain 4 in memory mode
mov ah, chain4
out dx, ax
mov dx, 03dah ; General Input State #1 register
l1: in al, dx ; Loop until vertical retrace is off
test al, 8
jnz l1
l2: in al, dx ; Now loop until it's back on
test al, 8
jz l2
cli ; turn off all interrupts
mov edx, 03c4h ; Sequencer Synchronous reset
mov eax, 0100h ; Set sequencer reset
out dx, ax
mov edx, 03c2h ; Misc Output Register
mov al, res ; 25/28-mHz, 350/400/480 lines
out dx, al
mov edx, 03c4h ; Sequencer Synchronous reset
mov eax, 0300h ; Clear sequencer reset
out dx, ax
mov edx, 03d4h ; 6845 CRTC
mov esi, crtc ; tweaked values for CRTC registers
mov al, 011h ; deprotect CRTC registers 0-7
mov ah, [esi+011h]
and ah, 07Fh
out dx, ax
mov ecx, 018h ; Update CRTC registers with tweaked values
mov ebx, 0
l3: mov al, bl
mov ah, [esi+ebx]
out dx, ax
inc bl
loop l3
sti ; restore interrupts
ret
gfxLoadCrtc endp
; void __cdecl gfxGetCrtc(unsigned char *crtc);
;
gfxGetCrtc proc USES ESI EBX, crtc:PTRBYTE
mov edx, 03d4h ; 6845 CRTC
mov esi, crtc
mov ecx, 018h
mov ebx, 0
l3: mov al, bl
out dx, al
inc dx
in al, dx
dec dx
mov [esi+ebx], al
inc bl
loop l3
ret
gfxGetCrtc endp
; void __cdecl gfxVres(unsigned char misc, unsigned char *crtc);
; misc is one of the following:
; 350: 0x23 | 0x80 (2)
; 400: 0x23 | 0x40 (1)
; 480: 0x23 | 0xc0 (3)
; Get crtc register specified by crtc_addr into ah.
; To update register, do out dx,ax
GetCrtc MACRO crtc_addr
mov al, crtc_addr
out dx, al
inc dx
in al, dx
dec dx
mov ah, al
mov al, crtc_addr
ENDM
gfxVres PROC USES EBX, misc:BYTE, crtc:PTRBYTE
mov edx, 03dah ; General Input State #1 register
l1: in al, dx ; Loop until vertical retrace is off
test al, 8
jnz l1
l2: in al, dx ; Now loop until it's back on
test al, 8
jz l2
cli ; turn off all interrupts
mov edx, 03c4h ; Sequencer Synchronous reset
mov eax, 0100h ; Set sequencer reset
out dx, ax
mov edx, 03cch ; Misc Output Register (read port)
in al, dx
and al, 03fh ; Keep all but lines field
mov edx, 03c2h ; Misc Output Register (write port)
and misc, 0c0h ; Only keep lines field
or al, misc ; 350/400/480 lines
out dx, al
mov edx, 03c4h ; Sequencer Synchronous reset
mov eax, 0300h ; Clear sequencer reset
out dx, ax
mov edx, 03d4h ; CRTC address port
mov ebx, crtc ; Desired CRTC image
GetCrtc 011h ; Vertical Retrace End register
and ah, 07Fh ; Deprotect CRTC registers 0-7
out dx, ax
GetCrtc 03h ; End Horizontal Blanking register
or ah, 080h ; Enable CRTC registers 10-11
out dx, ax
mov al, 06h ; Vertical Total register
mov ah, byte ptr 06h[ebx]
out dx, ax
GetCrtc 07h ; Overflow register
and ah, 010h ; (Preserve LC)
or ah, byte ptr 07h[ebx]
out dx, ax
GetCrtc 09h ; Maximum Scan Line register
and ah, 040h ; (Preserve LC)
or ah, byte ptr 09h[ebx]
out dx, ax
mov al, 010h ; Vertical Retrace Start register
mov ah, byte ptr 010h[ebx]
out dx, ax
GetCrtc 11h ; Vertical Retrace End register
and ah, 070h ; (Preserve BW,DVI,CVI)
or ah, byte ptr 011h[ebx]
or ah, 080h ; Reprotect 0-7
out dx, ax
mov al, 012h ; Vertical Display End register
mov ah, byte ptr 012h[ebx]
out dx, ax
mov al, 015h ; Start Vertical Blank register
mov ah, byte ptr 015h[ebx]
out dx, ax
; Some SVGA's use 7-bit vbe, others 8-bit vbe!
if 0
GetCrtc 16h ; End Vertical Blank register
and ah, 080h ; (Preserve reserved field)
or ah, byte ptr 016h[ebx]
out dx, ax
else
mov al, 16h ; End Vertical Blank register
mov ah, byte ptr 016h[ebx]
out dx, ax
endif
sti ; restore interrupts
ret
gfxVres ENDP
; void __cdecl MVE_gfxWaitRetrace(unsigned state);
;
MVE_gfxWaitRetrace proc state:DWORD
mov edx, 03dah ; Input Status #1 register
mov eax, state
or eax, eax
jnz wt1
wt0: in al, dx ; Wait for retrace off
and al, 8
jnz wt0
ret
wt1: in al, dx ; Wait for retrace on
and al, 8
jz wt1
ret
MVE_gfxWaitRetrace endp
; void __cdecl MVE_gfxSetSplit(unsigned line)
;
MVE_gfxSetSplit proc line:DWORD
mov edx, 03dah ; Input State #1 register
wt0: in al, dx ; Wait for retrace off
and al, 8
jnz wt0
wt1: in al, dx ; Wait for retrace on
and al, 8
jz wt1
mov edx, 03d4h ; CRTC address port
mov ecx, line
shr ecx, 4
and cl, 010h
GetCrtc 07h ; Overflow Register
and ah, 0EFh ; LC8 (mask=10h)
or ah, cl
out dx, ax
mov ecx, line
shr ecx, 3
and cl, 040h
GetCrtc 09h ; Maximum Scan Line Register
and ah, 0BFh ; LC9 (mask=40h)
or ah, cl
out dx, ax
mov al, 18h ; Line Compare Register
mov ah, byte ptr line
out dx, ax
ret
MVE_gfxSetSplit endp
;----------------------------------------------------------------------
mveliba_end:
END