mirror of
https://github.com/kevinbentley/Descent3.git
synced 2025-01-22 11:28:56 +00:00
6c8977caf0
The vast majority of this is fixing up `char *` that should be `const char *` but a handful of other fixes, like potential buffer overflows that GCC noticed, etc, were applied as well. This removes `-Wno-write-strings` from CMakeLists.txt, as it is no longer necessary, as there is no longer a flood of compiler warning spam when building. This does not fix all compiler warnings; there are still a handful, and they are legitimate, but they can be dealt with in a future commit.
9636 lines
170 KiB
NASM
9636 lines
170 KiB
NASM
; .386
|
|
.486 ; I only need .386, but I wanted the 486 cycle timings
|
|
.MODEL FLAT, C
|
|
|
|
;;--- Types ---
|
|
|
|
PTRBYTE TYPEDEF PTR BYTE
|
|
PTRWORD TYPEDEF PTR WORD
|
|
PTRDWORD TYPEDEF PTR DWORD
|
|
PTRPROC TYPEDEF PTR PROC
|
|
|
|
;;--- Constants ---
|
|
|
|
; Width and height of sections in pixels.
|
|
SWIDTH equ 8
|
|
SHEIGHT equ 8
|
|
|
|
LOG2_SWIDTH equ 3
|
|
LOG2_SHEIGHT equ 3
|
|
|
|
;;---
|
|
|
|
EXTERN pal_tbl:BYTE ;unsigned char pal_tbl[3*256];
|
|
EXTERN pal15_tbl:WORD ;unsigned short pal15_tbl[256];
|
|
;; NextFrame working storage
|
|
; MemRec nf_mem_buf1;
|
|
; MemRec nf_mem_buf2;
|
|
EXTERN nf_buf_cur: PTRBYTE ; unsigned char* nf_buf_cur;
|
|
EXTERN nf_buf_prv: PTRBYTE ; unsigned char* nf_buf_prv;
|
|
|
|
;; NextFrame parameters
|
|
EXTERN nf_wqty: BYTE ;unsigned char nf_wqty; // (width/SWIDTH)
|
|
EXTERN nf_hqty: BYTE ;unsigned char nf_hqty; // (height/SHEIGHT)
|
|
EXTERN nf_fqty: BYTE ;unsigned char nf_fqty; // Number of fields
|
|
EXTERN nf_hicolor: DWORD ;unsigned nf_hicolor; // HiColor (0:none,1:normal,2:swapped)
|
|
;; <derived quantities>
|
|
EXTERN nf_width: DWORD ;unsigned nf_width; // wqty * SWIDTH
|
|
EXTERN nf_height: DWORD ;unsigned nf_height; // hqty * SHEIGHT;
|
|
EXTERN nf_new_line: DWORD ;unsigned nf_new_line; // width - SWIDTH
|
|
EXTERN nf_new_row0: DWORD ;unsigned nf_new_row0; // SHEIGHT*width*2-width
|
|
EXTERN nf_back_right: DWORD ;unsigned nf_back_right; // (SHEIGHT-1)*width
|
|
|
|
;; Frame parameters
|
|
;; Portion of current frame which has been updated
|
|
;; and needs to be sent to screen.
|
|
;;
|
|
EXTERN nf_new_x: DWORD ;unsigned nf_new_x;
|
|
EXTERN nf_new_y: DWORD ;unsigned nf_new_y;
|
|
EXTERN nf_new_w: DWORD ;unsigned nf_new_w;
|
|
EXTERN nf_new_h: DWORD ;unsigned nf_new_h;
|
|
|
|
.data
|
|
|
|
BYTE "(c) 1997 Interplay Productions. All Rights Reserved.\n"
|
|
BYTE "This file is confidential and consists of proprietary information\n"
|
|
BYTE "of Interplay Productions. This file and associated libraries\n"
|
|
BYTE "may not, in whole or in part, be disclosed to third parties,\n"
|
|
BYTE "incorporated into any software product which is not being created\n"
|
|
BYTE "for Interplay Productions, copied or duplicated in any form,\n"
|
|
BYTE "without the prior written permission of Interplay Productions.\n"
|
|
BYTE "Further, you may not reverse engineer, decompile or otherwise\n"
|
|
BYTE "attempt to derive source code of this material.\n",0
|
|
|
|
.code
|
|
|
|
|
|
NF_DECOMP_INIT MACRO HI_COLOR_FLAG: REQ
|
|
|
|
mov ax, ds ; Insure es==ds for symantec flat mode
|
|
mov es, ax
|
|
|
|
mov eax, nf_buf_prv ; DiffBufPtrs = nf_buf_prv - nf_buf_cur
|
|
sub eax, nf_buf_cur
|
|
mov DiffBufPtrs, eax
|
|
|
|
xor ebx, ebx ; ebx = nf_fqty (convert to 32-bits)
|
|
mov bl, nf_fqty
|
|
|
|
mov eax, x ; nf_new_x = x*SWIDTH*2^HI_COLOR_FLAG;
|
|
shl eax, LOG2_SWIDTH+HI_COLOR_FLAG
|
|
mov nf_new_x, eax
|
|
|
|
mov eax, w ; nf_new_w = w*SWIDTH*2^HI_COLOR_FLAG;
|
|
shl eax, LOG2_SWIDTH+HI_COLOR_FLAG
|
|
mov nf_new_w, eax
|
|
|
|
mov eax, y ; nf_new_y = y*nf_fqty*SHEIGHT;
|
|
shl eax, LOG2_SHEIGHT
|
|
mul ebx ;nf_fqty
|
|
mov nf_new_y, eax
|
|
|
|
mov eax, h ; nf_new_h = h*nf_fqty*SHEIGHT;
|
|
shl eax, LOG2_SHEIGHT
|
|
mul ebx ;nf_fqty
|
|
mov nf_new_h, eax
|
|
|
|
mov eax, nf_new_row0 ; new_row = nf_new_row0 - nf_new_w;
|
|
sub eax, nf_new_w
|
|
mov new_row, eax
|
|
|
|
;; Move to correct place in current buffer
|
|
mov eax, nf_buf_cur ; tbuf = nf_buf_cur
|
|
mov tbuf, eax
|
|
.if x || y ; if (x||y)
|
|
mov eax, nf_new_y ; tbuf += nf_new_y*nf_width + nf_new_x;
|
|
mul nf_width
|
|
add eax, nf_new_x
|
|
add tbuf, eax
|
|
.endif
|
|
|
|
ENDM ; DECOMP_INIT
|
|
|
|
DECOMP_BODY MACRO HI_COLOR_FLAG:REQ
|
|
|
|
LOCAL HI_COLOR_SCALE
|
|
HI_COLOR_SCALE equ HI_COLOR_FLAG+1
|
|
NF_DECOMP_INIT HI_COLOR_FLAG
|
|
|
|
mov eax, w ; parms_sz = (w*h*nf_fqty)<<1
|
|
mul h
|
|
mul ebx ;nf_fqty
|
|
shl eax, 1
|
|
mov parms_sz, eax
|
|
|
|
; esi indexes comp (to get new section data)
|
|
; edi indexes current screen buffer
|
|
; edx is a frequently used constant
|
|
; ebx indexes section params
|
|
mov edi, tbuf
|
|
mov edx, nf_new_line ; width - SWIDTH
|
|
mov ebx, comp ; Parms index
|
|
mov esi, ebx
|
|
add esi, parms_sz ; Skip over flags (w*h*2)
|
|
|
|
; Iterate over params and copy new hires data to appropriate sections.
|
|
mov cl, nf_fqty
|
|
ns_0f: push ecx
|
|
push edi
|
|
mov ch, byte ptr h
|
|
ns_0: mov cl, byte ptr w
|
|
ns_1: cmp word ptr [ebx],0
|
|
je ns_10
|
|
add edi, SWIDTH*HI_COLOR_SCALE
|
|
ns_2: add ebx, 2
|
|
dec cl
|
|
jnz ns_1
|
|
add edi, new_row ; SHEIGHT*width - SWIDTH*w
|
|
dec ch
|
|
jnz ns_0
|
|
pop edi
|
|
pop ecx
|
|
add edi, nf_width
|
|
dec cl
|
|
jnz ns_0f
|
|
jmp ns_99
|
|
|
|
; Copy new data to one section
|
|
; Enter with esi pointing to source data, edi to screen section.
|
|
|
|
; Assumes SWIDTH=8 (16-bit data) and SHEIGHT=8
|
|
ns_10:
|
|
REPEAT 7
|
|
REPEAT 2*HI_COLOR_SCALE
|
|
movsd
|
|
ENDM
|
|
add edi, edx
|
|
ENDM
|
|
REPEAT 2*HI_COLOR_SCALE
|
|
movsd
|
|
ENDM
|
|
|
|
sub edi, nf_back_right ; (SHEIGHT-1)*width
|
|
jmp ns_2
|
|
|
|
ns_99:
|
|
|
|
; Iterate over flags and motion source addresses from params
|
|
; to determine which sections to move.
|
|
; ebx indexes params.
|
|
; esi indexes source from buffer
|
|
; esi will be computed as +- 16K relative to edi.
|
|
|
|
sub ebx, parms_sz ; Move back to start of section parms
|
|
mov edi, tbuf
|
|
mov cl, nf_fqty
|
|
xor esi, esi
|
|
ms_0f: push ecx
|
|
push edi
|
|
mov ch, byte ptr h
|
|
ms_0: mov cl, byte ptr w
|
|
ms_1: or si, [ebx]
|
|
jg ms_10
|
|
jl ms_j30
|
|
add edi, SWIDTH*HI_COLOR_SCALE
|
|
ms_2: add ebx, 2
|
|
dec cl
|
|
jnz ms_1
|
|
add edi, new_row ; SHEIGHT*width - SWIDTH*w
|
|
dec ch
|
|
jnz ms_0
|
|
pop edi
|
|
pop ecx
|
|
add edi, nf_width
|
|
dec cl
|
|
jnz ms_0f
|
|
jmp ms_99
|
|
|
|
ms_j30: jmp ms_30
|
|
|
|
; Move one section from current screen to current screen.
|
|
; Enter with
|
|
; edi pointing to destination screen section,
|
|
; relative value of source offset in esi.
|
|
|
|
; The following assumes SWIDTH==8 and SHEIGHT==8
|
|
|
|
ms_10: ; Make esi absolute
|
|
lea esi, [esi*HI_COLOR_SCALE-04000h*HI_COLOR_SCALE+edi]
|
|
REPEAT 7
|
|
REPEAT 2*HI_COLOR_SCALE
|
|
movsd
|
|
ENDM
|
|
add esi, edx
|
|
add edi, edx
|
|
ENDM
|
|
REPEAT 2*HI_COLOR_SCALE
|
|
movsd
|
|
ENDM
|
|
|
|
sub edi, nf_back_right ; (SHEIGHT-1)*width
|
|
xor esi, esi ; Reset esi to zero
|
|
jmp ms_2
|
|
|
|
|
|
ms_20f: push ecx
|
|
push edi
|
|
mov ch, byte ptr h
|
|
ms_20: mov cl, byte ptr w
|
|
ms_21: or si, [ebx]
|
|
jl ms_30
|
|
jg ms_j10
|
|
add edi, SWIDTH*HI_COLOR_SCALE
|
|
ms_22: add ebx, 2
|
|
dec cl
|
|
jnz ms_21
|
|
add edi, new_row ; SHEIGHT*width - SWIDTH*w
|
|
dec ch
|
|
jnz ms_20
|
|
pop edi
|
|
pop ecx
|
|
add edi, nf_width
|
|
dec cl
|
|
jnz ms_20f
|
|
jmp ms_99
|
|
|
|
ms_j10: jmp ms_10
|
|
|
|
; Move one section from previous screen to current screen.
|
|
; Enter with
|
|
; edi pointing to destination screen section,
|
|
; relative value of source offset in esi.
|
|
|
|
; The following assumes SWIDTH==8 and SHEIGHT==8
|
|
|
|
ms_30: ; Make esi absolute
|
|
lea esi, [esi*HI_COLOR_SCALE-0C000h*HI_COLOR_SCALE+edi]
|
|
add esi, DiffBufPtrs ; and point to other buffer
|
|
|
|
REPEAT 7
|
|
REPEAT 2*HI_COLOR_SCALE
|
|
movsd
|
|
ENDM
|
|
add esi, edx
|
|
add edi, edx
|
|
ENDM
|
|
REPEAT 2*HI_COLOR_SCALE
|
|
movsd
|
|
ENDM
|
|
|
|
sub edi, nf_back_right ; (SHEIGHT-1)*width
|
|
xor esi, esi ; Reset esi to zero
|
|
jmp ms_22
|
|
|
|
ms_99:
|
|
ENDM ; DECOMP_BODY
|
|
|
|
; Non-HiColor versions
|
|
|
|
; Decompress into subsection of current buffer specified
|
|
; by x,y,w,h in units of SWIDTHxSHEIGHT (8x8).
|
|
;
|
|
;void nfDecomp(unsigned char *comp,
|
|
; unsigned x, unsigned y, unsigned w, unsigned h)
|
|
;
|
|
nfDecomp PROC USES ESI EDI EBX, comp:PTRBYTE, x:DWORD, y:DWORD, w:DWORD, h:DWORD
|
|
LOCAL tbuf: PTRBYTE
|
|
LOCAL new_row: DWORD
|
|
LOCAL DiffBufPtrs: DWORD
|
|
LOCAL parms_sz: DWORD
|
|
|
|
.if nf_hicolor
|
|
INVOKE nfHiColorDecomp, comp,x,y,w,h
|
|
ret
|
|
.endif
|
|
|
|
DECOMP_BODY 0 ; Not HiColor
|
|
|
|
ret
|
|
nfDecomp ENDP
|
|
|
|
; Decompress into subsection of current buffer specified
|
|
; by x,y,w,h in units of SWIDTHxSHEIGHT (8x8).
|
|
;
|
|
|
|
;void
|
|
;nfHiColorDecomp(unsigned char *comp,
|
|
; unsigned x, unsigned y, unsigned w, unsigned h)
|
|
;
|
|
nfHiColorDecomp PROC USES ESI EDI EBX,comp:PTRBYTE,x:DWORD, y:DWORD, w:DWORD, h:DWORD
|
|
LOCAL tbuf: PTRBYTE
|
|
LOCAL new_row: DWORD
|
|
LOCAL DiffBufPtrs: DWORD
|
|
LOCAL parms_sz: DWORD
|
|
|
|
DECOMP_BODY 1 ; HiColor
|
|
|
|
ret
|
|
nfHiColorDecomp ENDP
|
|
|
|
DECOMP_CHG_BODY MACRO HI_COLOR_FLAG:REQ
|
|
|
|
LOCAL HI_COLOR_SCALE
|
|
HI_COLOR_SCALE equ HI_COLOR_FLAG+1
|
|
NF_DECOMP_INIT HI_COLOR_FLAG
|
|
|
|
; esi indexes comp (to get new section data)
|
|
; edi indexes current screen buffer
|
|
; edx is a frequently used constant
|
|
; ebx indexes section params
|
|
mov edi, tbuf
|
|
mov edx, nf_new_line ; width - SWIDTH
|
|
mov esi, comp
|
|
mov ebx, parms
|
|
|
|
; Iterate over params and copy new hires data to appropriate sections.
|
|
|
|
mov eax, chgs
|
|
mov pChgs, eax
|
|
mov eax, 0
|
|
mov cl, nf_fqty
|
|
ns_0f: push ecx
|
|
push edi
|
|
mov ch, byte ptr h
|
|
ns_0: mov cl, byte ptr w
|
|
ns_1: add ax, ax
|
|
ja ns_1b
|
|
jz ns_5
|
|
cmp word ptr [ebx],0
|
|
je ns_10
|
|
add ebx, 2
|
|
ns_1b: add edi, SWIDTH*HI_COLOR_SCALE
|
|
ns_2: dec cl
|
|
jnz ns_1
|
|
add edi, new_row ; SHEIGHT*width - SWIDTH*w
|
|
dec ch
|
|
jnz ns_0
|
|
pop edi
|
|
pop ecx
|
|
add edi, nf_width
|
|
dec cl
|
|
jnz ns_0f
|
|
jmp ns_99
|
|
|
|
ns_5: mov eax, pChgs
|
|
add pChgs, 2
|
|
mov ax, [eax]
|
|
jmp ns_1
|
|
|
|
; Copy new data to one section
|
|
; Enter with ds:si pointing to source data, es:di to screen section.
|
|
|
|
; Assumes SWIDTH=8 (16-bit data) and SHEIGHT=8
|
|
ns_10:
|
|
REPEAT 7
|
|
REPEAT 2*HI_COLOR_SCALE
|
|
movsd
|
|
ENDM
|
|
add edi, edx
|
|
ENDM
|
|
REPEAT 2*HI_COLOR_SCALE
|
|
movsd
|
|
ENDM
|
|
|
|
sub edi, nf_back_right ; (SHEIGHT-1)*width
|
|
add ebx, 2
|
|
jmp ns_2
|
|
|
|
ns_99:
|
|
|
|
; Iterate over flags and motion source addresses from params
|
|
; to determine which sections to move.
|
|
; ebx indexes params.
|
|
; esi indexes source from buffer
|
|
; esi will be computed as +- 16K relative to edi.
|
|
|
|
mov edi, tbuf
|
|
mov ebx, parms
|
|
|
|
mov eax, chgs
|
|
mov pChgs, eax
|
|
mov eax, 0
|
|
mov cl, byte ptr nf_fqty
|
|
xor esi, esi
|
|
ms_0f: push ecx
|
|
push edi
|
|
mov ch, byte ptr h
|
|
ms_0: mov cl, byte ptr w
|
|
ms_1: add ax, ax
|
|
ja ms_1b
|
|
jz ms_5
|
|
or si, [ebx]
|
|
jg ms_10
|
|
jl ms_j30
|
|
add ebx, 2
|
|
ms_1b: add edi, SWIDTH*HI_COLOR_SCALE
|
|
ms_2: dec cl
|
|
jnz ms_1
|
|
add edi, new_row ; SHEIGHT*width - SWIDTH*w
|
|
dec ch
|
|
jnz ms_0
|
|
pop edi
|
|
pop ecx
|
|
add edi, nf_width
|
|
dec cl
|
|
jnz ms_0f
|
|
jmp ms_99
|
|
|
|
ms_5: mov eax, pChgs
|
|
add pChgs, 2
|
|
mov ax, word ptr [eax]
|
|
jmp ms_1
|
|
|
|
|
|
ms_j30: jmp ms_30
|
|
|
|
; Move one section from current screen to current screen.
|
|
; Enter with
|
|
; edi pointing to destination screen section,
|
|
; relative value of source offset in esi.
|
|
|
|
; The following assumes SWIDTH==8 and SHEIGHT==8
|
|
|
|
ms_10: ; Make esi absolute
|
|
lea esi, [esi*HI_COLOR_SCALE-04000h*HI_COLOR_SCALE+edi]
|
|
REPEAT 7
|
|
REPEAT 2*HI_COLOR_SCALE
|
|
movsd
|
|
ENDM
|
|
add esi, edx
|
|
add edi, edx
|
|
ENDM
|
|
REPEAT 2*HI_COLOR_SCALE
|
|
movsd
|
|
ENDM
|
|
|
|
sub edi, nf_back_right ; (SHEIGHT-1)*width
|
|
xor esi, esi ; Reset esi to zero
|
|
add ebx, 2
|
|
jmp ms_2
|
|
|
|
|
|
ms_20f: push ecx
|
|
push edi
|
|
mov ch, byte ptr h
|
|
ms_20: mov cl, byte ptr w
|
|
ms_21: add ax, ax
|
|
ja ms_21b
|
|
jz ms_25
|
|
or si, [ebx]
|
|
jl ms_30
|
|
jg ms_j10
|
|
add ebx, 2
|
|
ms_21b: add edi, SWIDTH*HI_COLOR_SCALE
|
|
ms_22: dec cl
|
|
jnz ms_21
|
|
add edi, new_row ; SHEIGHT*width - SWIDTH*w
|
|
dec ch
|
|
jnz ms_20
|
|
pop edi
|
|
pop ecx
|
|
add edi, nf_width
|
|
dec cl
|
|
jnz ms_20f
|
|
jmp ms_99
|
|
|
|
ms_25: mov eax, pChgs
|
|
add pChgs, 2
|
|
mov ax, [eax]
|
|
jmp ms_21
|
|
|
|
ms_j10: jmp ms_10
|
|
|
|
; Move one section from previous screen to current screen.
|
|
; Enter with
|
|
; edi pointing to destination screen section,
|
|
; relative value of source offset in esi.
|
|
|
|
; The following assumes SWIDTH==8 and SHEIGHT==8
|
|
|
|
ms_30: ; Make esi absolute
|
|
lea esi, [esi*HI_COLOR_SCALE-0C000h*HI_COLOR_SCALE+edi]
|
|
add esi, DiffBufPtrs ; and point to other buffer
|
|
|
|
REPEAT 7
|
|
REPEAT 2*HI_COLOR_SCALE
|
|
movsd
|
|
ENDM
|
|
add esi, edx
|
|
add edi, edx
|
|
ENDM
|
|
REPEAT 2*HI_COLOR_SCALE
|
|
movsd
|
|
ENDM
|
|
|
|
sub edi, nf_back_right ; (SHEIGHT-1)*width
|
|
add ebx, 2
|
|
xor esi, esi ; Reset esi to zero
|
|
jmp ms_22
|
|
|
|
ms_99:
|
|
ENDM ; DECOMP_CHG_BODY
|
|
|
|
|
|
; Decompress into subsection of current buffer specified
|
|
; by x,y,w,h in units of SWIDTHxSHEIGHT (8x8).
|
|
; Chgs specifies which squares to update.
|
|
; Parms are motion parms for squares to update.
|
|
;
|
|
;void
|
|
;nfDecompChg(unsigned short *chgs,
|
|
; unsigned short *parms,
|
|
; unsigned char *comp,
|
|
; unsigned x, unsigned y, unsigned w, unsigned h)
|
|
;
|
|
nfDecompChg PROC USES ESI EDI EBX,chgs:PTRWORD, parms:PTRWORD,comp:PTRBYTE,x:DWORD, y:DWORD, w:DWORD, h:DWORD
|
|
LOCAL tbuf: PTRBYTE
|
|
LOCAL new_row: DWORD
|
|
LOCAL DiffBufPtrs: DWORD
|
|
LOCAL pChgs: PTRBYTE
|
|
|
|
.if nf_hicolor
|
|
INVOKE nfHiColorDecompChg, chgs,parms,comp,x,y,w,h
|
|
ret
|
|
.endif
|
|
|
|
DECOMP_CHG_BODY 0 ; Not HiColor
|
|
|
|
ret
|
|
nfDecompChg ENDP
|
|
|
|
; Decompress into subsection of current buffer specified
|
|
; by x,y,w,h in units of SWIDTHxSHEIGHT (8x8).
|
|
; Chgs specifies which squares to update.
|
|
; Parms are motion parms for squares to update.
|
|
;
|
|
;void
|
|
;nfHiColorDecompChg(unsigned short *chgs,
|
|
; unsigned short *parms,
|
|
; unsigned char *comp,
|
|
; unsigned x, unsigned y, unsigned w, unsigned h)
|
|
;
|
|
nfHiColorDecompChg PROC USES ESI EDI EBX,chgs:PTRWORD,parms:PTRWORD,comp:PTRBYTE,x:DWORD, y:DWORD, w:DWORD, h:DWORD
|
|
LOCAL tbuf: PTRBYTE
|
|
LOCAL new_row: DWORD
|
|
LOCAL DiffBufPtrs: DWORD
|
|
LOCAL pChgs: PTRBYTE
|
|
|
|
DECOMP_CHG_BODY 1 ; HiColor
|
|
ret
|
|
nfHiColorDecompChg ENDP
|
|
|
|
|
|
.data
|
|
|
|
; luminace table for palette entries
|
|
lum_tbl DWORD 256 DUP (0)
|
|
|
|
; signed 8-bit y * nf_width
|
|
nfpk_ShiftY DWORD 256 DUP (0)
|
|
|
|
; Constant tables
|
|
|
|
; 8-bit -8:7 x nf_width + -8:7
|
|
nfpk_ShiftP1 LABEL WORD
|
|
FOR y, <-8,-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7>
|
|
FOR x, <-8,-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7>
|
|
BYTE x,y
|
|
ENDM
|
|
ENDM
|
|
|
|
; 8-bit to right and below in roughly 0:14*nf_width + -14:14 (-3 cases)
|
|
; negative is
|
|
; 8-bit to left and above in roughly -14:0*nf_width + -14:14 (-3 cases)
|
|
nfpk_ShiftP2 LABEL WORD
|
|
FOR y, <0,1,2,3,4,5,6,7>
|
|
FOR x, <8,9,10,11,12,13,14>
|
|
BYTE x,y
|
|
ENDM
|
|
ENDM
|
|
FOR y, <8,9,10,11,12,13>
|
|
FOR x, <-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1>
|
|
BYTE x,y
|
|
ENDM
|
|
FOR x, <0,1,2,3,4,5,6,7,8,9,10,11,12,13,14>
|
|
BYTE x,y
|
|
ENDM
|
|
ENDM
|
|
FOR x, <-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1>
|
|
BYTE x,14
|
|
ENDM
|
|
FOR x, <0,1,2,3,4,5,6,7,8,9,10,11>
|
|
BYTE x,14
|
|
ENDM
|
|
|
|
nfpk_mov4l LABEL DWORD
|
|
; mov ax, bx,cx
|
|
MOV4L_REGS TEXTEQU <!<0c0h+3,0c0h+1!>>
|
|
%FOR m4, MOV4L_REGS
|
|
% FOR m3, MOV4L_REGS
|
|
% FOR m2, MOV4L_REGS
|
|
% FOR m1, MOV4L_REGS
|
|
BYTE m2,m1,m4,m3
|
|
ENDM
|
|
ENDM
|
|
ENDM
|
|
ENDM
|
|
|
|
nfpk_mov8 LABEL DWORD
|
|
; mov ax, bx/dx/cx/bp
|
|
MOV8_REGS TEXTEQU <!<0c0h+3,0c0h+2,0c0h+1,0c0h+5!>>
|
|
%FOR m4, MOV8_REGS
|
|
% FOR m3, MOV8_REGS
|
|
% FOR m2, MOV8_REGS
|
|
% FOR m1, MOV8_REGS
|
|
BYTE m2,m1,m4,m3
|
|
ENDM
|
|
ENDM
|
|
ENDM
|
|
ENDM
|
|
|
|
nfpk_mov4 LABEL DWORD
|
|
; mov al, bl/bh/cl/ch
|
|
MOV4_REGS0 TEXTEQU <!<0c0h+3,0c0h+7,0c0h+1,0c0h+5!>>
|
|
; mov ah, bl/bh/cl/ch
|
|
MOV4_REGS1 TEXTEQU <!<0e0h+3,0e0h+7,0e0h+1,0e0h+5!>>
|
|
%FOR m4, MOV4_REGS1
|
|
% FOR m3, MOV4_REGS0
|
|
% FOR m2, MOV4_REGS1
|
|
% FOR m1, MOV4_REGS0
|
|
BYTE m3,m4,m1,m2
|
|
ENDM
|
|
ENDM
|
|
ENDM
|
|
ENDM
|
|
|
|
.code
|
|
|
|
; nfPkConfig initializes tables used by nfPkDecomp
|
|
; which are dependent on screen size.
|
|
nfPkConfig PROC USES ESI EDI EBX
|
|
|
|
; Build ShiftY table
|
|
;
|
|
lea edi, nfpk_ShiftY
|
|
mov ebx, nf_width
|
|
|
|
mov eax, 0
|
|
mov ecx, 128
|
|
lp1: mov [edi], eax
|
|
add edi,4
|
|
add eax,ebx
|
|
dec ecx
|
|
jne lp1
|
|
|
|
mov eax, ebx
|
|
shl eax, 7
|
|
neg eax
|
|
mov ecx, 128
|
|
lp2: mov [edi], eax
|
|
add edi,4
|
|
add eax,ebx
|
|
dec ecx
|
|
jne lp2
|
|
|
|
ret
|
|
nfPkConfig ENDP
|
|
|
|
|
|
|
|
EXTERN sf_LineWidth: DWORD ;unsigned sf_LineWidth; // Distance between lines in memory
|
|
|
|
; Banked screen parameters
|
|
EXTERN sf_SetBank: PTRPROC ;unsigned long sf_SetBank;
|
|
EXTERN sf_WinGran: DWORD ;unsigned sf_WinGran;
|
|
EXTERN sf_WinSize: DWORD ;unsigned long sf_WinSize;
|
|
EXTERN sf_WinGranPerSize: DWORD ;unsigned sf_WinGranPerSize;
|
|
;{sf_WriteWinPtr and sf_WriteWinLimit replace sf_WriteWinSeg, see mveliba.asm}
|
|
EXTERN sf_WriteWinPtr: PTRBYTE ;unsigned char *sf_WriteWinPtr;
|
|
EXTERN sf_WriteWinLimit: PTRBYTE ;unsigned char *WriteWinLimit;
|
|
EXTERN sf_WriteWin: DWORD ;unsigned sf_WriteWin;
|
|
|
|
EXTERN opt_hscale_step: DWORD
|
|
EXTERN opt_hscale_adj: DWORD
|
|
|
|
|
|
;void mve_ShowFrameField(
|
|
; unsigned char *buf, unsigned bufw, unsigned bufh,
|
|
; unsigned sx, unsigned sy, unsigned w, unsigned h,
|
|
; unsigned dstx, unsigned dsty, unsigned field)
|
|
|
|
mve_ShowFrameField PROC USES ESI EDI EBX, buf:PTRBYTE, bufw:DWORD, bufh:DWORD, sx:DWORD, sy:DWORD, w:DWORD, h:DWORD, dstx:DWORD, dsty:DWORD, field:DWORD
|
|
LOCAL bank:DWORD
|
|
LOCAL w4:DWORD
|
|
LOCAL new_src_line:DWORD
|
|
LOCAL linestep:DWORD
|
|
LOCAL new_dst_line:DWORD
|
|
|
|
mov ax, ds ; Insure es==ds for symantec flat mode
|
|
mov es, ax
|
|
|
|
mov eax, w ; w4 = w>>2
|
|
shr eax, 2
|
|
mov w4, eax
|
|
|
|
;;; <WIP>
|
|
;;; In stretched width mode, we either keep 4/5 (a) of the source pixels,
|
|
;;; or duplicate every fourth pixel to magnify by 5/4 (b).
|
|
;;; In these cases, new_src_line is either bufw-w*5/4 (a) or bufw-w*4/5 (b).
|
|
;;; Let ScaleStep be 5 (a) or 3 (b) instead of 4. This is the amount to advance
|
|
;;; the source after copying 32-bits from source to destination.
|
|
;;; The coordinate system used for the source will be a simulated scaled system.
|
|
;;; Rather than scale height, I plan to use alternate vertical resolutions. However,
|
|
;;; it might be a good idea to also provide for scaled height in case we want a
|
|
;;; higher resolution border.
|
|
;;; Question: Do we still need to support transferring subrectangles?
|
|
|
|
.if opt_hscale_step==4
|
|
mov eax, bufw ; new_src_line = bufw - w
|
|
sub eax, w
|
|
mov new_src_line, eax
|
|
.else
|
|
mov eax, opt_hscale_adj
|
|
mov new_src_line, eax
|
|
.endif
|
|
|
|
mov eax, sf_LineWidth ; linestep = sf_LineWidth<<1;
|
|
.if field ; if (field)
|
|
add eax, eax ; linestep <<= 1;
|
|
.endif
|
|
mov linestep, eax
|
|
|
|
sub eax, w ; new_dst_line = linestep - w;
|
|
mov new_dst_line, eax
|
|
|
|
mov eax, sy ; buf += sy*bufw + sx
|
|
mul bufw
|
|
add eax, sx
|
|
add buf, eax
|
|
|
|
mov eax, sx ; dstx += sx
|
|
add dstx, eax
|
|
|
|
; <WIP> This is a hack. We should pass in src x,y of origin
|
|
; or make dstx/dsty absolute.
|
|
;
|
|
mov eax, bufw ; if (field && sx >= (bufw>>1)
|
|
shr eax, 1
|
|
.if field && sx >= eax
|
|
sub dstx, eax ; dstx -= bufw>>1
|
|
.endif
|
|
|
|
mov eax, sy ; dsty += sy
|
|
add dsty, eax
|
|
|
|
.if sf_SetBank==0 ;------------------
|
|
|
|
|
|
; dst = WriteWinPtr + (dsty*linestep+dstx)
|
|
mov edi, sf_WriteWinPtr
|
|
mov eax, dsty
|
|
mul linestep
|
|
add eax, dstx
|
|
add edi, eax
|
|
|
|
.if field & 1
|
|
add edi, sf_LineWidth;
|
|
.endif
|
|
|
|
mov eax, new_src_line
|
|
mov edx, new_dst_line
|
|
mov esi, buf
|
|
mov ebx, h
|
|
|
|
.if opt_hscale_step==3
|
|
sub edi, 8
|
|
sf_lp2a:mov ecx, w4
|
|
shr ecx, 2
|
|
ALIGN 4
|
|
sf_lp2b:mov eax, [esi]
|
|
mov [edi+8], eax
|
|
mov eax, [esi+3]
|
|
mov [edi+12], eax
|
|
add edi, 16
|
|
mov eax, [esi+6]
|
|
mov [edi], eax
|
|
mov eax, [esi+9]
|
|
mov [edi+4], eax
|
|
add esi, 12
|
|
dec ecx
|
|
jnz sf_lp2b
|
|
; To avoid problem of last pixel coming from next line
|
|
; with arrange for w%16==12, so here is where we copy
|
|
; last 12 pixels.
|
|
mov eax, [esi]
|
|
mov [edi+8], eax
|
|
mov eax, [esi+3]
|
|
mov [edi+12], eax
|
|
add edi, 12
|
|
mov eax, [esi+6]
|
|
mov [edi+4], eax
|
|
add esi, 9
|
|
add esi, new_src_line
|
|
add edi, edx
|
|
dec ebx
|
|
jnz sf_lp2a
|
|
add edi, 8
|
|
.else
|
|
sf_lp: mov ecx, w4 ;width/4
|
|
rep movsd
|
|
add esi, eax
|
|
add edi, edx
|
|
dec ebx
|
|
jnz sf_lp
|
|
.endif
|
|
|
|
.else ; sf_SetBank ;------------------
|
|
|
|
|
|
mov esi, buf
|
|
|
|
; start = dsty * linestep + dstx
|
|
|
|
mov eax, linestep
|
|
mul dsty
|
|
.if field & 1
|
|
add eax, sf_LineWidth
|
|
.endif
|
|
add eax, dstx
|
|
; bank = start / WinGran
|
|
; dst = (start % WinGran) + sf_WriteWinPtr
|
|
mov edx, 0
|
|
div sf_WinGran
|
|
mov bank, eax
|
|
mov edi, edx
|
|
add edi, sf_WriteWinPtr
|
|
|
|
; Select new bank
|
|
mov bh, 0
|
|
mov bl, byte ptr sf_WriteWin
|
|
mov edx, bank
|
|
call sf_SetBank
|
|
; eax/edx destroyed by sf_SetBank
|
|
|
|
sf_0: ; rem = sf_WriteWinLimit - dst
|
|
mov eax, sf_WriteWinLimit
|
|
sub eax, edi
|
|
; h2 = (rem+(LineWidth-w))/LineWidth
|
|
add eax, linestep
|
|
sub eax, w
|
|
mov edx, 0
|
|
div linestep
|
|
; if (h<h2) h2=h
|
|
cmp h, eax
|
|
jae skplim
|
|
mov eax, h
|
|
skplim: ; if (h2==0) // No full lines can be transfered.
|
|
or eax, eax
|
|
jz sf_2
|
|
; h-= h2
|
|
; Transfer h2 lines to screen
|
|
sub h, eax
|
|
mov ebx, new_src_line
|
|
mov edx, new_dst_line
|
|
.if opt_hscale_step==3
|
|
sub edi, 8
|
|
sf_1a3: mov ecx, w4
|
|
shr ecx, 2
|
|
ALIGN 4
|
|
sf_1b3: mov ebx, [esi]
|
|
mov [edi+8], ebx
|
|
mov ebx, [esi+3]
|
|
mov [edi+12], ebx
|
|
add edi, 16
|
|
mov ebx, [esi+6]
|
|
mov [edi], ebx
|
|
mov ebx, [esi+9]
|
|
mov [edi+4], ebx
|
|
add esi, 12
|
|
dec ecx
|
|
jnz sf_1b3
|
|
; To avoid problem of last pixel coming from next line
|
|
; with arrange for w%16==12, so here is where we copy
|
|
; last 12 pixels.
|
|
mov ebx, [esi]
|
|
mov [edi+8], ebx
|
|
mov ebx, [esi+3]
|
|
mov [edi+12], ebx
|
|
add edi, 12
|
|
mov ebx, [esi+6]
|
|
mov [edi+4], ebx
|
|
add esi, 9
|
|
add esi, new_src_line
|
|
add edi, edx
|
|
dec eax
|
|
jnz sf_1a3
|
|
add edi, 8
|
|
.else
|
|
sf_1: mov ecx, w4 ; width/4
|
|
rep movsd
|
|
add esi, ebx
|
|
add edi, edx
|
|
dec eax
|
|
jnz sf_1
|
|
.endif
|
|
|
|
sf_2: ; if (h!=0) // There are still lines to be transfered
|
|
; // transfer partial line
|
|
or eax, h
|
|
jz sf_9
|
|
|
|
; w4a = 0 max (sf_WriteWinLimit-dst)/4
|
|
mov ecx, sf_WriteWinLimit
|
|
sub ecx, edi
|
|
sar ecx, 2
|
|
jns sf_2b
|
|
mov ecx, 0
|
|
sf_2b: push ecx ; Save size of first half
|
|
.if opt_hscale_step==3
|
|
|
|
ALIGN 4
|
|
or ecx, ecx
|
|
jz sf_2c3
|
|
sf_2b3: mov eax, [esi]
|
|
mov [edi], eax
|
|
add esi, 3
|
|
add edi, 4
|
|
dec ecx
|
|
jnz sf_2b3
|
|
sf_2c3:
|
|
|
|
.else
|
|
rep movsd
|
|
.endif
|
|
|
|
; bank += WinSize/WinGran //Assumes WinSize%WinGran==0
|
|
; off -= (WinSize/WinGran)*WinGran == WinSize
|
|
|
|
mov eax, sf_WinGranPerSize
|
|
add bank, eax
|
|
sub edi, sf_WinSize
|
|
|
|
; Select new bank
|
|
mov bh, 0
|
|
mov bl, byte ptr sf_WriteWin
|
|
mov edx, bank
|
|
call sf_SetBank
|
|
; eax/edx destroyed by sf_SetBank
|
|
|
|
; w4b = w4-w4a // Size of second half
|
|
pop eax ; Size of first half
|
|
mov ecx, w4
|
|
sub ecx, eax ; Size of 2nd half
|
|
.if opt_hscale_step==3
|
|
ALIGN 4
|
|
or ecx, ecx
|
|
jz sf_8b3
|
|
sf_8a3: mov eax, [esi]
|
|
mov [edi], eax
|
|
add esi, 3
|
|
add edi, 4
|
|
dec ecx
|
|
jnz sf_8a3
|
|
sf_8b3:
|
|
.else
|
|
rep movsd
|
|
.endif
|
|
add esi, new_src_line
|
|
add edi, new_dst_line
|
|
;; --h // Count split line
|
|
;; if (h!=0) continue
|
|
dec h
|
|
jnz sf_0
|
|
sf_9:
|
|
|
|
.endif ; sf_SetBank==0 ;------------------
|
|
|
|
ret
|
|
|
|
mve_ShowFrameField ENDP
|
|
|
|
nfHPkDecomp PROC USES ESI EDI EBX, \
|
|
ops:PTRBYTE, comp:PTRBYTE, \
|
|
x:DWORD, y:DWORD, w:DWORD, h:DWORD
|
|
LOCAL tbuf: PTRBYTE
|
|
LOCAL new_row:DWORD
|
|
LOCAL DiffBufPtrs:DWORD
|
|
|
|
LOCAL nfpk_back_right: DWORD
|
|
LOCAL wcnt:DWORD
|
|
LOCAL bcomp:PTRBYTE
|
|
|
|
LOG_LABEL "StartPkDecomp"
|
|
|
|
.data
|
|
nfhpk_OpTbl label dword
|
|
dword offset nf0 ; Prev Same (0)
|
|
dword offset nf1 ; No change (and copied to screen) (0)
|
|
dword offset nf2 ; Near shift from older part of current buf (1)
|
|
dword offset nf3 ; Near shift from newer part of current buf (1)
|
|
dword offset nf4 ; Near shift from previous buffer (1)
|
|
dword offset nf5 ; Far shift from previous buffer (2)
|
|
dword offset nf6 ; Far shift from current buffer (2)
|
|
; [Or if COMPOPS, run of no changes (0)]
|
|
dword offset nf7 ; 8x8x1 (10 bytes) or low 4x4x1 (4 bytes)
|
|
dword offset nf8 ; 2x2 4x4x1 (16 bytes) or 2x1 4x8x1 (12 bytes) or 1x2 8x4x1 (12 bytes)
|
|
dword offset nf9 ; 8x8x2 (20 bytes) or low 4x4x2 (8 bytes) or
|
|
; low 4x8x2 (12 bytes) or low 8x4x2 (12 bytes)
|
|
dword offset nf10 ; 2x2 4x4x2 (32 bytes) or 2x1 4x8x2 (24 bytes) or 1x2 4x8x2 (24 bytes)
|
|
dword offset nf11 ; 8x8x8 (64 bytes)
|
|
dword offset nf12 ; low 4x4x8 (16 bytes)
|
|
dword offset nf13 ; 2x2 4x4x0 (ie 2x2x8) (4 bytes)
|
|
dword offset nf14 ; 8x8x0 (1 byte)
|
|
dword offset nf15 ; mix 8x8x0 (2 bytes)
|
|
.code
|
|
|
|
NF_DECOMP_INIT 1
|
|
|
|
mov eax, nf_back_right
|
|
sub eax, SWIDTH*2
|
|
mov nfpk_back_right, eax
|
|
|
|
mov esi, comp
|
|
mov edi, tbuf
|
|
|
|
xor eax, eax
|
|
mov ax, [esi]
|
|
add eax, esi
|
|
mov bcomp, eax
|
|
add esi, 2
|
|
|
|
nf_StartRow:
|
|
mov eax, w
|
|
shr eax, 1
|
|
mov wcnt,eax
|
|
ALIGN 4
|
|
nf_NextPair:
|
|
dec wcnt
|
|
js nf_NextRow
|
|
mov ebx, ops
|
|
mov al, [ebx]
|
|
inc ebx
|
|
mov ops, ebx
|
|
|
|
xor ebx, ebx
|
|
mov bl, al
|
|
shr bl, 4
|
|
and eax, 0Fh
|
|
push offset nf_NextPair
|
|
push nfhpk_OpTbl[ebx*4]
|
|
jmp nfhpk_OpTbl[eax*4]
|
|
|
|
nf_NextRow:
|
|
add edi, new_row
|
|
dec h
|
|
jnz nf_StartRow
|
|
LOG_LABEL "EndPkDecomp"
|
|
|
|
ret
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf0: ; No change from previous buffer
|
|
mov eax, DiffBufPtrs
|
|
jmp nf_shift
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf1: ; No change (and copied to screen)
|
|
if 0 ;debug
|
|
mov ebx, 0
|
|
jmp nf_solid
|
|
endif
|
|
add edi, SWIDTH*2
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf2: ; Near shift from older part of current buffer
|
|
xor eax, eax
|
|
mov ebx, bcomp
|
|
inc bcomp
|
|
mov al, [ebx]
|
|
mov ax, nfpk_ShiftP2[eax*2]
|
|
nf_xyc_shift:
|
|
xor ebx, ebx
|
|
mov bl, ah
|
|
shl eax, 24
|
|
sar eax, 24-1
|
|
add eax, nfpk_ShiftY[ebx*4]
|
|
jmp nf_shift
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf3: ; Near shift from newer part of current buffer
|
|
xor eax, eax
|
|
mov ebx, bcomp
|
|
inc bcomp
|
|
mov al, [ebx]
|
|
mov ax, nfpk_ShiftP2[eax*2]
|
|
neg al
|
|
neg ah
|
|
jmp nf_xyc_shift
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf4: ; Near shift from previous buffer
|
|
xor eax, eax
|
|
mov ebx, bcomp
|
|
inc bcomp
|
|
mov al, [ebx]
|
|
mov ax, nfpk_ShiftP1[eax*2]
|
|
jmp nf_xyp_shift
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf5: ; Far shift from previous buffer
|
|
mov ax, [esi]
|
|
add esi, 2
|
|
nf_xyp_shift:
|
|
xor ebx, ebx
|
|
mov bl, ah
|
|
shl eax, 24
|
|
sar eax, 24-1
|
|
add eax, nfpk_ShiftY[ebx*4]
|
|
add eax, DiffBufPtrs
|
|
jmp nf_shift
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
|
|
nf6: ; Far shift from current buffer
|
|
mov ax, [esi]
|
|
add esi, 2
|
|
jmp nf_xyc_shift
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf_shift:
|
|
if 0 ;debug
|
|
mov ebx, 0
|
|
jmp nf_solid
|
|
endif
|
|
mov ebx, esi ; save esi
|
|
lea esi, [edi+eax]
|
|
mov edx, nf_width
|
|
|
|
REPEAT 7
|
|
mov eax, [esi]
|
|
mov [edi], eax
|
|
mov eax, [esi+4]
|
|
mov [edi+4], eax
|
|
mov eax, [esi+8]
|
|
mov [edi+8], eax
|
|
mov eax, [esi+12]
|
|
mov [edi+12], eax
|
|
add esi, edx
|
|
add edi, edx
|
|
ENDM
|
|
mov eax, [esi]
|
|
mov [edi], eax
|
|
mov eax, [esi+4]
|
|
mov [edi+4], eax
|
|
mov eax, [esi+8]
|
|
mov [edi+8], eax
|
|
mov eax, [esi+12]
|
|
mov [edi+12], eax
|
|
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
mov esi, ebx ; restore esi
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf7: ; 8x8x1 (12 bytes)
|
|
|
|
test word ptr [esi], 08000h
|
|
jnz nf23
|
|
|
|
if 0 ;debug
|
|
add esi, 12
|
|
mov ebx, 0
|
|
jmp nf_solid
|
|
endif
|
|
xor eax, eax
|
|
lea ecx, nfhpk_mov8
|
|
lea edx, byte ptr ds:nf7_11+1
|
|
|
|
mov al, [esi+4]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf7_11-nf7_11)], bl
|
|
mov [edx+(nf7_12-nf7_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf7_13-nf7_11)], bl
|
|
mov [edx+(nf7_14-nf7_11)], bh
|
|
|
|
mov al, [esi+5]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf7_21-nf7_11)], bl
|
|
mov [edx+(nf7_22-nf7_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf7_23-nf7_11)], bl
|
|
mov [edx+(nf7_24-nf7_11)], bh
|
|
|
|
mov al, [esi+6]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf7_31-nf7_11)], bl
|
|
mov [edx+(nf7_32-nf7_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf7_33-nf7_11)], bl
|
|
mov [edx+(nf7_34-nf7_11)], bh
|
|
|
|
mov al, [esi+7]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf7_41-nf7_11)], bl
|
|
mov [edx+(nf7_42-nf7_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf7_43-nf7_11)], bl
|
|
mov [edx+(nf7_44-nf7_11)], bh
|
|
|
|
lea edx, [edx+(nf7_51-nf7_11)]
|
|
|
|
mov al, [esi+8]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf7_51-nf7_51)], bl
|
|
mov [edx+(nf7_52-nf7_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf7_53-nf7_51)], bl
|
|
mov [edx+(nf7_54-nf7_51)], bh
|
|
|
|
mov al, [esi+9]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf7_61-nf7_51)], bl
|
|
mov [edx+(nf7_62-nf7_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf7_63-nf7_51)], bl
|
|
mov [edx+(nf7_64-nf7_51)], bh
|
|
|
|
mov al, [esi+10]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf7_71-nf7_51)], bl
|
|
mov [edx+(nf7_72-nf7_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf7_73-nf7_51)], bl
|
|
mov [edx+(nf7_74-nf7_51)], bh
|
|
|
|
mov al, [esi+11]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf7_81-nf7_51)], bl
|
|
mov [edx+(nf7_82-nf7_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf7_83-nf7_51)], bl
|
|
mov [edx+(nf7_84-nf7_51)], bh
|
|
|
|
push ebp
|
|
push esi
|
|
; load ebx,edx,ecx,ebp with 00,01,10,11 color combinations
|
|
; (note that bits are read least significant first).
|
|
Trans16 cx, esi+2
|
|
shl ecx, 16
|
|
Trans16 cx, esi
|
|
mov esi,nf_width
|
|
mov edx, ecx
|
|
|
|
ror edx, 16
|
|
mov ebx, edx
|
|
mov bx, cx
|
|
mov ebp, ecx
|
|
mov bp, dx
|
|
|
|
jmp nf7_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf7_0:
|
|
nf7_11: mov [ebp+0], ebx
|
|
nf7_12: mov [ebp+4], ebx
|
|
nf7_13: mov [ebp+8], ebx
|
|
nf7_14: mov [ebp+12], ebx
|
|
add edi, esi
|
|
|
|
nf7_21: mov [ebp+0], ebx
|
|
nf7_22: mov [ebp+4], ebx
|
|
nf7_23: mov [ebp+8], ebx
|
|
nf7_24: mov [ebp+12], ebx
|
|
add edi, esi
|
|
|
|
nf7_31: mov [ebp+0], ebx
|
|
nf7_32: mov [ebp+4], ebx
|
|
nf7_33: mov [ebp+8], ebx
|
|
nf7_34: mov [ebp+12], ebx
|
|
add edi, esi
|
|
|
|
nf7_41: mov [ebp+0], ebx
|
|
nf7_42: mov [ebp+4], ebx
|
|
nf7_43: mov [ebp+8], ebx
|
|
nf7_44: mov [ebp+12], ebx
|
|
add edi, esi
|
|
|
|
nf7_51: mov [ebp+0], ebx
|
|
nf7_52: mov [ebp+4], ebx
|
|
nf7_53: mov [ebp+8], ebx
|
|
nf7_54: mov [ebp+12], ebx
|
|
add edi, esi
|
|
|
|
nf7_61: mov [ebp+0], ebx
|
|
nf7_62: mov [ebp+4], ebx
|
|
nf7_63: mov [ebp+8], ebx
|
|
nf7_64: mov [ebp+12], ebx
|
|
add edi, esi
|
|
|
|
nf7_71: mov [ebp+0], ebx
|
|
nf7_72: mov [ebp+4], ebx
|
|
nf7_73: mov [ebp+8], ebx
|
|
nf7_74: mov [ebp+12], ebx
|
|
add edi, esi
|
|
|
|
nf7_81: mov [ebp+0], ebx
|
|
nf7_82: mov [ebp+4], ebx
|
|
nf7_83: mov [ebp+8], ebx
|
|
nf7_84: mov [ebp+12], ebx
|
|
|
|
pop esi
|
|
pop ebp
|
|
add esi, 12
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
;nf7+16
|
|
nf23: ; low 4x4x1 (6 bytes)
|
|
|
|
if 0 ;debug
|
|
add esi, 6
|
|
mov ebx, 0
|
|
jmp nf_solid
|
|
endif
|
|
xor eax, eax
|
|
lea ecx, nfhpk_mov4l
|
|
lea edx, byte ptr ds:nf23_11+1
|
|
|
|
mov al, [esi+4]
|
|
and al, 0fH
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf23_11-nf23_11)], bl
|
|
mov [edx+(nf23_12-nf23_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf23_13-nf23_11)], bl
|
|
mov [edx+(nf23_14-nf23_11)], bh
|
|
|
|
mov al, [esi+4]
|
|
shr al, 4
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf23_31-nf23_11)], bl
|
|
mov [edx+(nf23_32-nf23_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf23_33-nf23_11)], bl
|
|
mov [edx+(nf23_34-nf23_11)], bh
|
|
|
|
|
|
mov al, [esi+5]
|
|
and al, 0fH
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf23_51-nf23_11)], bl
|
|
mov [edx+(nf23_52-nf23_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf23_53-nf23_11)], bl
|
|
mov [edx+(nf23_54-nf23_11)], bh
|
|
|
|
mov al, [esi+5]
|
|
shr al, 4
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf23_71-nf23_11)], bl
|
|
mov [edx+(nf23_72-nf23_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf23_73-nf23_11)], bl
|
|
mov [edx+(nf23_74-nf23_11)], bh
|
|
|
|
mov edx, nf_width
|
|
|
|
; load ebx,ecx with 00,11 color combinations
|
|
|
|
Trans16 cx, esi, 1
|
|
shrd ebx, ecx, 16
|
|
mov bx, cx
|
|
Trans16 cx, esi+2
|
|
shrd eax, ecx, 16
|
|
mov ax, cx
|
|
mov ecx, eax
|
|
|
|
jmp nf23_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf23_0:
|
|
|
|
nf23_11:mov eax, ebx
|
|
mov [edi], eax
|
|
mov [edi+edx], eax
|
|
nf23_12:mov eax, ebx
|
|
mov [edi+4], eax
|
|
mov [edi+edx+4], eax
|
|
nf23_13:mov eax, ebx
|
|
mov [edi+8], eax
|
|
mov [edi+edx+8], eax
|
|
nf23_14:mov eax, ebx
|
|
mov [edi+12], eax
|
|
mov [edi+edx+12], eax
|
|
lea edi, [edi+edx*2]
|
|
|
|
nf23_31:mov eax, ebx
|
|
mov [edi], eax
|
|
mov [edi+edx], eax
|
|
nf23_32:mov eax, ebx
|
|
mov [edi+4], eax
|
|
mov [edi+edx+4], eax
|
|
nf23_33:mov eax, ebx
|
|
mov [edi+8], eax
|
|
mov [edi+edx+8], eax
|
|
nf23_34:mov eax, ebx
|
|
mov [edi+12], eax
|
|
mov [edi+edx+12], eax
|
|
lea edi, [edi+edx*2]
|
|
|
|
nf23_51:mov eax, ebx
|
|
mov [edi], eax
|
|
mov [edi+edx], eax
|
|
nf23_52:mov eax, ebx
|
|
mov [edi+4], eax
|
|
mov [edi+edx+4], eax
|
|
nf23_53:mov eax, ebx
|
|
mov [edi+8], eax
|
|
mov [edi+edx+8], eax
|
|
nf23_54:mov eax, ebx
|
|
mov [edi+12], eax
|
|
mov [edi+edx+12], eax
|
|
lea edi, [edi+edx*2]
|
|
|
|
nf23_71:mov eax, ebx
|
|
mov [edi], eax
|
|
mov [edi+edx], eax
|
|
nf23_72:mov eax, ebx
|
|
mov [edi+4], eax
|
|
mov [edi+edx+4], eax
|
|
nf23_73:mov eax, ebx
|
|
mov [edi+8], eax
|
|
mov [edi+edx+8], eax
|
|
nf23_74:mov eax, ebx
|
|
mov [edi+12], eax
|
|
mov [edi+edx+12], eax
|
|
add edi, edx
|
|
|
|
sub edi, nfpk_back_right
|
|
add esi, 6
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf8: ; 2x2 4x4x1 (24 bytes)
|
|
|
|
test word ptr [esi], 08000h
|
|
jnz nf24
|
|
|
|
if 0 ;debug
|
|
add esi, 24
|
|
mov ebx, 0
|
|
jmp nf_solid
|
|
endif
|
|
xor eax, eax
|
|
lea ecx, nfhpk_mov8
|
|
lea edx, byte ptr ds:nf8_11+1
|
|
|
|
mov al, [esi+4]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf8_11-nf8_11)], bl
|
|
mov [edx+(nf8_12-nf8_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf8_13-nf8_11)], bl
|
|
mov [edx+(nf8_14-nf8_11)], bh
|
|
|
|
mov al, [esi+5]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf8_21-nf8_11)], bl
|
|
mov [edx+(nf8_22-nf8_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf8_23-nf8_11)], bl
|
|
mov [edx+(nf8_24-nf8_11)], bh
|
|
|
|
|
|
mov al, [esi+10]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf8_31-nf8_11)], bl
|
|
mov [edx+(nf8_32-nf8_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf8_33-nf8_11)], bl
|
|
mov [edx+(nf8_34-nf8_11)], bh
|
|
|
|
mov al, [esi+11]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf8_41-nf8_11)], bl
|
|
mov [edx+(nf8_42-nf8_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf8_43-nf8_11)], bl
|
|
mov [edx+(nf8_44-nf8_11)], bh
|
|
|
|
add edx, nf8_51-nf8_11
|
|
|
|
mov al, [esi+16]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf8_51-nf8_51)], bl
|
|
mov [edx+(nf8_52-nf8_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf8_53-nf8_51)], bl
|
|
mov [edx+(nf8_54-nf8_51)], bh
|
|
|
|
mov al, [esi+17]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf8_61-nf8_51)], bl
|
|
mov [edx+(nf8_62-nf8_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf8_63-nf8_51)], bl
|
|
mov [edx+(nf8_64-nf8_51)], bh
|
|
|
|
|
|
mov al, [esi+22]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf8_71-nf8_51)], bl
|
|
mov [edx+(nf8_72-nf8_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf8_73-nf8_51)], bl
|
|
mov [edx+(nf8_74-nf8_51)], bh
|
|
|
|
mov al, [esi+23]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf8_81-nf8_51)], bl
|
|
mov [edx+(nf8_82-nf8_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf8_83-nf8_51)], bl
|
|
mov [edx+(nf8_84-nf8_51)], bh
|
|
|
|
|
|
push ebp
|
|
push esi
|
|
; load ebx,edx,ecx,ebp with 00,01,10,11 color combinations
|
|
; (note that bits are read least significant first).
|
|
|
|
Trans16 cx, esi+18+2
|
|
shl ecx, 16
|
|
Trans16 cx, esi+18
|
|
push ecx
|
|
|
|
Trans16 cx, esi+12+2
|
|
shl ecx, 16
|
|
Trans16 cx, esi+12
|
|
push ecx
|
|
|
|
Trans16 cx, esi+6+2
|
|
shl ecx, 16
|
|
Trans16 cx, esi+6
|
|
push ecx
|
|
|
|
Trans16 cx, esi+2
|
|
shl ecx, 16
|
|
Trans16 cx, esi
|
|
|
|
mov esi,nf_width
|
|
mov edx, ecx
|
|
ror edx, 16
|
|
mov ebx, edx
|
|
mov bx, cx
|
|
mov ebp, ecx
|
|
mov bp, dx
|
|
|
|
jmp nf8_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf8_0:
|
|
nf8_11: mov [ebp+0], ebx
|
|
nf8_12: mov [ebp+4], ebx
|
|
add edi, esi
|
|
nf8_13: mov [ebp+0], ebx
|
|
nf8_14: mov [ebp+4], ebx
|
|
add edi, esi
|
|
|
|
nf8_21: mov [ebp+0], ebx
|
|
nf8_22: mov [ebp+4], ebx
|
|
add edi, esi
|
|
nf8_23: mov [ebp+0], ebx
|
|
nf8_24: mov [ebp+4], ebx
|
|
add edi, esi
|
|
|
|
pop ecx
|
|
mov edx, ecx
|
|
ror edx, 16
|
|
mov ebx, edx
|
|
mov bx, cx
|
|
mov ebp, ecx
|
|
mov bp, dx
|
|
|
|
|
|
nf8_31: mov [ebp+0], ebx
|
|
nf8_32: mov [ebp+4], ebx
|
|
add edi, esi
|
|
nf8_33: mov [ebp+0], ebx
|
|
nf8_34: mov [ebp+4], ebx
|
|
add edi, esi
|
|
|
|
nf8_41: mov [ebp+0], ebx
|
|
nf8_42: mov [ebp+4], ebx
|
|
add edi, esi
|
|
nf8_43: mov [ebp+0], ebx
|
|
nf8_44: mov [ebp+4], ebx
|
|
add edi, esi
|
|
|
|
lea eax, [esi*8-8]
|
|
sub edi, eax
|
|
|
|
pop ecx
|
|
mov edx, ecx
|
|
ror edx, 16
|
|
mov ebx, edx
|
|
mov bx, cx
|
|
mov ebp, ecx
|
|
mov bp, dx
|
|
|
|
nf8_51: mov [ebp+0], ebx
|
|
nf8_52: mov [ebp+4], ebx
|
|
add edi, esi
|
|
nf8_53: mov [ebp+0], ebx
|
|
nf8_54: mov [ebp+4], ebx
|
|
add edi, esi
|
|
|
|
nf8_61: mov [ebp+0], ebx
|
|
nf8_62: mov [ebp+4], ebx
|
|
add edi, esi
|
|
nf8_63: mov [ebp+0], ebx
|
|
nf8_64: mov [ebp+4], ebx
|
|
add edi, esi
|
|
|
|
pop ecx
|
|
mov edx, ecx
|
|
ror edx, 16
|
|
mov ebx, edx
|
|
mov bx, cx
|
|
mov ebp, ecx
|
|
mov bp, dx
|
|
|
|
nf8_71: mov [ebp+0], ebx
|
|
nf8_72: mov [ebp+4], ebx
|
|
add edi, esi
|
|
nf8_73: mov [ebp+0], ebx
|
|
nf8_74: mov [ebp+4], ebx
|
|
add edi, esi
|
|
|
|
nf8_81: mov [ebp+0], ebx
|
|
nf8_82: mov [ebp+4], ebx
|
|
add edi, esi
|
|
nf8_83: mov [ebp+0], ebx
|
|
nf8_84: mov [ebp+4], ebx
|
|
|
|
pop esi
|
|
pop ebp
|
|
add esi, 24
|
|
sub edi, 8
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
;nf8+16
|
|
nf24: ; 2x1 4x8x1 (16 bytes)
|
|
|
|
test word ptr [esi+8], 08000h
|
|
jnz nf40
|
|
|
|
if 0 ;debug
|
|
add esi, 16
|
|
mov ebx, 0
|
|
jmp nf_solid
|
|
endif
|
|
xor eax, eax
|
|
lea ecx, nfhpk_mov8
|
|
lea edx, byte ptr ds:nf24_11+1
|
|
|
|
mov al, [esi+4]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf24_11-nf24_11)], bl
|
|
mov [edx+(nf24_12-nf24_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf24_13-nf24_11)], bl
|
|
mov [edx+(nf24_14-nf24_11)], bh
|
|
|
|
mov al, [esi+5]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf24_21-nf24_11)], bl
|
|
mov [edx+(nf24_22-nf24_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf24_23-nf24_11)], bl
|
|
mov [edx+(nf24_24-nf24_11)], bh
|
|
|
|
|
|
mov al, [esi+6]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf24_31-nf24_11)], bl
|
|
mov [edx+(nf24_32-nf24_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf24_33-nf24_11)], bl
|
|
mov [edx+(nf24_34-nf24_11)], bh
|
|
|
|
mov al, [esi+7]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf24_41-nf24_11)], bl
|
|
mov [edx+(nf24_42-nf24_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf24_43-nf24_11)], bl
|
|
mov [edx+(nf24_44-nf24_11)], bh
|
|
|
|
add edx, nf24_51-nf24_11
|
|
|
|
mov al, [esi+12]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf24_51-nf24_51)], bl
|
|
mov [edx+(nf24_52-nf24_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf24_53-nf24_51)], bl
|
|
mov [edx+(nf24_54-nf24_51)], bh
|
|
|
|
mov al, [esi+13]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf24_61-nf24_51)], bl
|
|
mov [edx+(nf24_62-nf24_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf24_63-nf24_51)], bl
|
|
mov [edx+(nf24_64-nf24_51)], bh
|
|
|
|
|
|
mov al, [esi+14]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf24_71-nf24_51)], bl
|
|
mov [edx+(nf24_72-nf24_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf24_73-nf24_51)], bl
|
|
mov [edx+(nf24_74-nf24_51)], bh
|
|
|
|
mov al, [esi+15]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf24_81-nf24_51)], bl
|
|
mov [edx+(nf24_82-nf24_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf24_83-nf24_51)], bl
|
|
mov [edx+(nf24_84-nf24_51)], bh
|
|
|
|
|
|
push ebp
|
|
push esi
|
|
; load ebx,edx,ecx,ebp with 00,01,10,11 color combinations
|
|
; (note that bits are read least significant first).
|
|
|
|
Trans16 cx, esi+8+2
|
|
shl ecx, 16
|
|
Trans16 cx, esi+8
|
|
push ecx
|
|
|
|
Trans16 cx, esi+2
|
|
shl ecx, 16
|
|
Trans16 cx, esi, 1
|
|
mov esi,nf_width
|
|
mov edx, ecx
|
|
ror edx, 16
|
|
mov ebx, edx
|
|
mov bx, cx
|
|
mov ebp, ecx
|
|
mov bp, dx
|
|
|
|
jmp nf24_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf24_0:
|
|
|
|
nf24_11:mov [ebp+0], ebx
|
|
nf24_12:mov [ebp+4], ebx
|
|
add edi, esi
|
|
nf24_13:mov [ebp+0], ebx
|
|
nf24_14:mov [ebp+4], ebx
|
|
add edi, esi
|
|
|
|
nf24_21:mov [ebp+0], ebx
|
|
nf24_22:mov [ebp+4], ebx
|
|
add edi, esi
|
|
nf24_23:mov [ebp+0], ebx
|
|
nf24_24:mov [ebp+4], ebx
|
|
add edi, esi
|
|
|
|
nf24_31:mov [ebp+0], ebx
|
|
nf24_32:mov [ebp+4], ebx
|
|
add edi, esi
|
|
nf24_33:mov [ebp+0], ebx
|
|
nf24_34:mov [ebp+4], ebx
|
|
add edi, esi
|
|
|
|
nf24_41:mov [ebp+0], ebx
|
|
nf24_42:mov [ebp+4], ebx
|
|
add edi, esi
|
|
nf24_43:mov [ebp+0], ebx
|
|
nf24_44:mov [ebp+4], ebx
|
|
add edi, esi
|
|
|
|
lea eax, [esi*8-8]
|
|
sub edi, eax
|
|
|
|
pop ecx
|
|
mov edx, ecx
|
|
ror edx, 16
|
|
mov ebx, edx
|
|
mov bx, cx
|
|
mov ebp, ecx
|
|
mov bp, dx
|
|
|
|
nf24_51:mov [ebp+0], ebx
|
|
nf24_52:mov [ebp+4], ebx
|
|
add edi, esi
|
|
nf24_53:mov [ebp+0], ebx
|
|
nf24_54:mov [ebp+4], ebx
|
|
add edi, esi
|
|
|
|
nf24_61:mov [ebp+0], ebx
|
|
nf24_62:mov [ebp+4], ebx
|
|
add edi, esi
|
|
nf24_63:mov [ebp+0], ebx
|
|
nf24_64:mov [ebp+4], ebx
|
|
add edi, esi
|
|
|
|
nf24_71:mov [ebp+0], ebx
|
|
nf24_72:mov [ebp+4], ebx
|
|
add edi, esi
|
|
nf24_73:mov [ebp+0], ebx
|
|
nf24_74:mov [ebp+4], ebx
|
|
add edi, esi
|
|
|
|
nf24_81:mov [ebp+0], ebx
|
|
nf24_82:mov [ebp+4], ebx
|
|
add edi, esi
|
|
nf24_83:mov [ebp+0], ebx
|
|
nf24_84:mov [ebp+4], ebx
|
|
|
|
pop esi
|
|
pop ebp
|
|
add esi, 16
|
|
sub edi, 8
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
;nf8+32
|
|
nf40: ; 1x2 8x4x1 (16 bytes)
|
|
|
|
if 0 ;debug
|
|
add esi, 16
|
|
mov ebx, 0
|
|
jmp nf_solid
|
|
endif
|
|
xor eax, eax
|
|
lea ecx, nfhpk_mov8
|
|
lea edx, byte ptr ds:nf40_11+1
|
|
|
|
mov al, [esi+4]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf40_11-nf40_11)], bl
|
|
mov [edx+(nf40_12-nf40_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf40_13-nf40_11)], bl
|
|
mov [edx+(nf40_14-nf40_11)], bh
|
|
|
|
mov al, [esi+5]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf40_21-nf40_11)], bl
|
|
mov [edx+(nf40_22-nf40_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf40_23-nf40_11)], bl
|
|
mov [edx+(nf40_24-nf40_11)], bh
|
|
|
|
|
|
mov al, [esi+6]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf40_31-nf40_11)], bl
|
|
mov [edx+(nf40_32-nf40_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf40_33-nf40_11)], bl
|
|
mov [edx+(nf40_34-nf40_11)], bh
|
|
|
|
mov al, [esi+7]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf40_41-nf40_11)], bl
|
|
mov [edx+(nf40_42-nf40_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf40_43-nf40_11)], bl
|
|
mov [edx+(nf40_44-nf40_11)], bh
|
|
|
|
add edx, nf40_51-nf40_11
|
|
|
|
mov al, [esi+12]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf40_51-nf40_51)], bl
|
|
mov [edx+(nf40_52-nf40_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf40_53-nf40_51)], bl
|
|
mov [edx+(nf40_54-nf40_51)], bh
|
|
|
|
mov al, [esi+13]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf40_61-nf40_51)], bl
|
|
mov [edx+(nf40_62-nf40_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf40_63-nf40_51)], bl
|
|
mov [edx+(nf40_64-nf40_51)], bh
|
|
|
|
|
|
mov al, [esi+14]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf40_71-nf40_51)], bl
|
|
mov [edx+(nf40_72-nf40_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf40_73-nf40_51)], bl
|
|
mov [edx+(nf40_74-nf40_51)], bh
|
|
|
|
mov al, [esi+15]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf40_81-nf40_51)], bl
|
|
mov [edx+(nf40_82-nf40_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf40_83-nf40_51)], bl
|
|
mov [edx+(nf40_84-nf40_51)], bh
|
|
|
|
|
|
push ebp
|
|
push esi
|
|
; load ebx,edx,ecx,ebp with 00,01,10,11 color combinations
|
|
; (note that bits are read least significant first).
|
|
Trans16 cx, esi+8+2
|
|
shl ecx, 16
|
|
Trans16 cx, esi+8, 1
|
|
push ecx
|
|
|
|
Trans16 cx, esi+2
|
|
shl ecx, 16
|
|
Trans16 cx, esi, 1
|
|
mov esi,nf_width
|
|
mov edx, ecx
|
|
ror edx, 16
|
|
mov ebx, edx
|
|
mov bx, cx
|
|
mov ebp, ecx
|
|
mov bp, dx
|
|
|
|
jmp nf40_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf40_0:
|
|
|
|
nf40_11:mov [ebp+0], ebx
|
|
nf40_12:mov [ebp+4], ebx
|
|
nf40_13:mov [ebp+8], ebx
|
|
nf40_14:mov [ebp+12], ebx
|
|
add edi, esi
|
|
|
|
nf40_21:mov [ebp+0], ebx
|
|
nf40_22:mov [ebp+4], ebx
|
|
nf40_23:mov [ebp+8], ebx
|
|
nf40_24:mov [ebp+12], ebx
|
|
add edi, esi
|
|
|
|
nf40_31:mov [ebp+0], ebx
|
|
nf40_32:mov [ebp+4], ebx
|
|
nf40_33:mov [ebp+8], ebx
|
|
nf40_34:mov [ebp+12], ebx
|
|
add edi, esi
|
|
|
|
nf40_41:mov [ebp+0], ebx
|
|
nf40_42:mov [ebp+4], ebx
|
|
nf40_43:mov [ebp+8], ebx
|
|
nf40_44:mov [ebp+12], ebx
|
|
add edi, esi
|
|
|
|
pop ecx
|
|
mov edx, ecx
|
|
ror edx, 16
|
|
mov ebx, edx
|
|
mov bx, cx
|
|
mov ebp, ecx
|
|
mov bp, dx
|
|
|
|
nf40_51:mov [ebp+0], ebx
|
|
nf40_52:mov [ebp+4], ebx
|
|
nf40_53:mov [ebp+8], ebx
|
|
nf40_54:mov [ebp+12], ebx
|
|
add edi, esi
|
|
|
|
nf40_61:mov [ebp+0], ebx
|
|
nf40_62:mov [ebp+4], ebx
|
|
nf40_63:mov [ebp+8], ebx
|
|
nf40_64:mov [ebp+12], ebx
|
|
add edi, esi
|
|
|
|
nf40_71:mov [ebp+0], ebx
|
|
nf40_72:mov [ebp+4], ebx
|
|
nf40_73:mov [ebp+8], ebx
|
|
nf40_74:mov [ebp+12], ebx
|
|
add edi, esi
|
|
|
|
nf40_81:mov [ebp+0], ebx
|
|
nf40_82:mov [ebp+4], ebx
|
|
nf40_83:mov [ebp+8], ebx
|
|
nf40_84:mov [ebp+12], ebx
|
|
|
|
pop esi
|
|
pop ebp
|
|
add esi, 16
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf9: ; 8x8x2 (24 bytes)
|
|
|
|
test word ptr [esi], 08000h
|
|
jnz nf41
|
|
|
|
test word ptr [esi+4], 08000h
|
|
jnz nf25
|
|
|
|
if 0 ;debug
|
|
add esi, 24
|
|
mov ebx, 0
|
|
jmp nf_solid
|
|
endif
|
|
xor eax, eax
|
|
lea ecx, nfhpk_mov4
|
|
lea edx, byte ptr ds:nf9_11+2
|
|
|
|
mov al, [esi+8]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_11-nf9_11)], bh
|
|
mov [edx+(nf9_12-nf9_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf9_13-nf9_11)], bh
|
|
mov [edx+(nf9_14-nf9_11)], bl
|
|
|
|
mov al, [esi+9]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_15-nf9_11)], bh
|
|
mov [edx+(nf9_16-nf9_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf9_17-nf9_11)], bh
|
|
mov [edx+(nf9_18-nf9_11)], bl
|
|
|
|
|
|
mov al, [esi+10]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_21-nf9_11)], bh
|
|
mov [edx+(nf9_22-nf9_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf9_23-nf9_11)], bh
|
|
mov [edx+(nf9_24-nf9_11)], bl
|
|
|
|
mov al, [esi+11]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_25-nf9_11)], bh
|
|
mov [edx+(nf9_26-nf9_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf9_27-nf9_11)], bh
|
|
mov [edx+(nf9_28-nf9_11)], bl
|
|
|
|
|
|
mov al, [esi+12]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_31-nf9_11)], bh
|
|
mov [edx+(nf9_32-nf9_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf9_33-nf9_11)], bh
|
|
mov [edx+(nf9_34-nf9_11)], bl
|
|
|
|
mov al, [esi+13]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_35-nf9_11)], bh
|
|
mov [edx+(nf9_36-nf9_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf9_37-nf9_11)], bh
|
|
mov [edx+(nf9_38-nf9_11)], bl
|
|
|
|
|
|
mov al, [esi+14]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_41-nf9_11)], bh
|
|
mov [edx+(nf9_42-nf9_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf9_43-nf9_11)], bh
|
|
mov [edx+(nf9_44-nf9_11)], bl
|
|
|
|
mov al, [esi+15]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_45-nf9_11)], bh
|
|
mov [edx+(nf9_46-nf9_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf9_47-nf9_11)], bh
|
|
mov [edx+(nf9_48-nf9_11)], bl
|
|
|
|
|
|
lea edx, [edx+(nf9_51-nf9_11)]
|
|
|
|
mov al, [esi+16]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_51-nf9_51)], bh
|
|
mov [edx+(nf9_52-nf9_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf9_53-nf9_51)], bh
|
|
mov [edx+(nf9_54-nf9_51)], bl
|
|
|
|
mov al, [esi+17]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_55-nf9_51)], bh
|
|
mov [edx+(nf9_56-nf9_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf9_57-nf9_51)], bh
|
|
mov [edx+(nf9_58-nf9_51)], bl
|
|
|
|
|
|
mov al, [esi+18]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_61-nf9_51)], bh
|
|
mov [edx+(nf9_62-nf9_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf9_63-nf9_51)], bh
|
|
mov [edx+(nf9_64-nf9_51)], bl
|
|
|
|
mov al, [esi+19]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_65-nf9_51)], bh
|
|
mov [edx+(nf9_66-nf9_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf9_67-nf9_51)], bh
|
|
mov [edx+(nf9_68-nf9_51)], bl
|
|
|
|
|
|
mov al, [esi+20]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_71-nf9_51)], bh
|
|
mov [edx+(nf9_72-nf9_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf9_73-nf9_51)], bh
|
|
mov [edx+(nf9_74-nf9_51)], bl
|
|
|
|
mov al, [esi+21]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_75-nf9_51)], bh
|
|
mov [edx+(nf9_76-nf9_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf9_77-nf9_51)], bh
|
|
mov [edx+(nf9_78-nf9_51)], bl
|
|
|
|
|
|
mov al, [esi+22]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_81-nf9_51)], bh
|
|
mov [edx+(nf9_82-nf9_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf9_83-nf9_51)], bh
|
|
mov [edx+(nf9_84-nf9_51)], bl
|
|
|
|
mov al, [esi+23]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_85-nf9_51)], bh
|
|
mov [edx+(nf9_86-nf9_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf9_87-nf9_51)], bh
|
|
mov [edx+(nf9_88-nf9_51)], bl
|
|
|
|
push ebp
|
|
push esi
|
|
; Load bx,dx,cx,bp with four colors
|
|
Trans16 bx, esi
|
|
Trans16 dx, esi+2
|
|
Trans16 cx, esi+4
|
|
Trans16 bp, esi+6
|
|
mov esi, nf_width
|
|
|
|
jmp nf9_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf9_0:
|
|
|
|
nf9_11: mov ax, bx
|
|
shl eax, 16
|
|
nf9_12: mov ax, bx
|
|
mov [edi], eax
|
|
nf9_13: mov ax, bx
|
|
shl eax, 16
|
|
nf9_14: mov ax, bx
|
|
mov [edi+4], eax
|
|
nf9_15: mov ax, bx
|
|
shl eax, 16
|
|
nf9_16: mov ax, bx
|
|
mov [edi+8], eax
|
|
nf9_17: mov ax, bx
|
|
shl eax, 16
|
|
nf9_18: mov ax, bx
|
|
mov [edi+12], eax
|
|
add edi, esi
|
|
|
|
nf9_21: mov ax, bx
|
|
shl eax, 16
|
|
nf9_22: mov ax, bx
|
|
mov [edi], eax
|
|
nf9_23: mov ax, bx
|
|
shl eax, 16
|
|
nf9_24: mov ax, bx
|
|
mov [edi+4], eax
|
|
nf9_25: mov ax, bx
|
|
shl eax, 16
|
|
nf9_26: mov ax, bx
|
|
mov [edi+8], eax
|
|
nf9_27: mov ax, bx
|
|
shl eax, 16
|
|
nf9_28: mov ax, bx
|
|
mov [edi+12], eax
|
|
add edi, esi
|
|
|
|
nf9_31: mov ax, bx
|
|
shl eax, 16
|
|
nf9_32: mov ax, bx
|
|
mov [edi], eax
|
|
nf9_33: mov ax, bx
|
|
shl eax, 16
|
|
nf9_34: mov ax, bx
|
|
mov [edi+4], eax
|
|
nf9_35: mov ax, bx
|
|
shl eax, 16
|
|
nf9_36: mov ax, bx
|
|
mov [edi+8], eax
|
|
nf9_37: mov ax, bx
|
|
shl eax, 16
|
|
nf9_38: mov ax, bx
|
|
mov [edi+12], eax
|
|
add edi, esi
|
|
|
|
nf9_41: mov ax, bx
|
|
shl eax, 16
|
|
nf9_42: mov ax, bx
|
|
mov [edi], eax
|
|
nf9_43: mov ax, bx
|
|
shl eax, 16
|
|
nf9_44: mov ax, bx
|
|
mov [edi+4], eax
|
|
nf9_45: mov ax, bx
|
|
shl eax, 16
|
|
nf9_46: mov ax, bx
|
|
mov [edi+8], eax
|
|
nf9_47: mov ax, bx
|
|
shl eax, 16
|
|
nf9_48: mov ax, bx
|
|
mov [edi+12], eax
|
|
add edi, esi
|
|
|
|
nf9_51: mov ax, bx
|
|
shl eax, 16
|
|
nf9_52: mov ax, bx
|
|
mov [edi], eax
|
|
nf9_53: mov ax, bx
|
|
shl eax, 16
|
|
nf9_54: mov ax, bx
|
|
mov [edi+4], eax
|
|
nf9_55: mov ax, bx
|
|
shl eax, 16
|
|
nf9_56: mov ax, bx
|
|
mov [edi+8], eax
|
|
nf9_57: mov ax, bx
|
|
shl eax, 16
|
|
nf9_58: mov ax, bx
|
|
mov [edi+12], eax
|
|
add edi, esi
|
|
|
|
nf9_61: mov ax, bx
|
|
shl eax, 16
|
|
nf9_62: mov ax, bx
|
|
mov [edi], eax
|
|
nf9_63: mov ax, bx
|
|
shl eax, 16
|
|
nf9_64: mov ax, bx
|
|
mov [edi+4], eax
|
|
nf9_65: mov ax, bx
|
|
shl eax, 16
|
|
nf9_66: mov ax, bx
|
|
mov [edi+8], eax
|
|
nf9_67: mov ax, bx
|
|
shl eax, 16
|
|
nf9_68: mov ax, bx
|
|
mov [edi+12], eax
|
|
add edi, esi
|
|
|
|
nf9_71: mov ax, bx
|
|
shl eax, 16
|
|
nf9_72: mov ax, bx
|
|
mov [edi], eax
|
|
nf9_73: mov ax, bx
|
|
shl eax, 16
|
|
nf9_74: mov ax, bx
|
|
mov [edi+4], eax
|
|
nf9_75: mov ax, bx
|
|
shl eax, 16
|
|
nf9_76: mov ax, bx
|
|
mov [edi+8], eax
|
|
nf9_77: mov ax, bx
|
|
shl eax, 16
|
|
nf9_78: mov ax, bx
|
|
mov [edi+12], eax
|
|
add edi, esi
|
|
|
|
nf9_81: mov ax, bx
|
|
shl eax, 16
|
|
nf9_82: mov ax, bx
|
|
mov [edi], eax
|
|
nf9_83: mov ax, bx
|
|
shl eax, 16
|
|
nf9_84: mov ax, bx
|
|
mov [edi+4], eax
|
|
nf9_85: mov ax, bx
|
|
shl eax, 16
|
|
nf9_86: mov ax, bx
|
|
mov [edi+8], eax
|
|
nf9_87: mov ax, bx
|
|
shl eax, 16
|
|
nf9_88: mov ax, bx
|
|
mov [edi+12], eax
|
|
|
|
pop esi
|
|
pop ebp
|
|
add esi, 24
|
|
sub edi, nfpk_back_right
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
;nf9+16
|
|
nf25: ; low 4x4x2 (12 bytes)
|
|
|
|
if 0 ;debug
|
|
add esi, 12
|
|
mov ebx, 0
|
|
jmp nf_solid
|
|
endif
|
|
xor eax, eax
|
|
lea ecx, nfhpk_mov4
|
|
lea edx, byte ptr ds:nf25_11+1
|
|
|
|
mov al, [esi+8]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf25_11-nf25_11)], bl
|
|
mov [edx+(nf25_12-nf25_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf25_13-nf25_11)], bl
|
|
mov [edx+(nf25_14-nf25_11)], bh
|
|
|
|
mov al, [esi+9]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf25_21-nf25_11)], bl
|
|
mov [edx+(nf25_22-nf25_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf25_23-nf25_11)], bl
|
|
mov [edx+(nf25_24-nf25_11)], bh
|
|
|
|
|
|
mov al, [esi+10]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf25_31-nf25_11)], bl
|
|
mov [edx+(nf25_32-nf25_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf25_33-nf25_11)], bl
|
|
mov [edx+(nf25_34-nf25_11)], bh
|
|
|
|
mov al, [esi+11]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf25_41-nf25_11)], bl
|
|
mov [edx+(nf25_42-nf25_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf25_43-nf25_11)], bl
|
|
mov [edx+(nf25_44-nf25_11)], bh
|
|
|
|
push ebp
|
|
push esi
|
|
; Load ebx,edx,ecx,ebp with four colors, duplicated in high order.
|
|
Trans16 cx, esi
|
|
shrd ebx, ecx, 16
|
|
mov bx, cx
|
|
Trans16 cx, esi+2
|
|
shrd edx, ecx, 16
|
|
mov dx, cx
|
|
Trans16 cx, esi+4, 1
|
|
shrd eax, ecx, 16
|
|
mov ax, cx
|
|
push eax
|
|
Trans16 cx, esi+6
|
|
shrd ebp, ecx, 16
|
|
mov bp, cx
|
|
pop ecx
|
|
mov esi, nf_width
|
|
|
|
jmp nf25_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf25_0:
|
|
|
|
nf25_11:mov eax, ebx
|
|
mov [edi], eax
|
|
mov [edi+esi], eax
|
|
nf25_12:mov eax, ebx
|
|
mov [edi+4], eax
|
|
mov [edi+esi+4], eax
|
|
nf25_13:mov eax, ebx
|
|
mov [edi+8], eax
|
|
mov [edi+esi+8], eax
|
|
nf25_14:mov eax, ebx
|
|
mov [edi+12], eax
|
|
mov [edi+esi+12], eax
|
|
lea edi, [edi+esi*2]
|
|
|
|
nf25_21:mov eax, ebx
|
|
mov [edi], eax
|
|
mov [edi+esi], eax
|
|
nf25_22:mov eax, ebx
|
|
mov [edi+4], eax
|
|
mov [edi+esi+4], eax
|
|
nf25_23:mov eax, ebx
|
|
mov [edi+8], eax
|
|
mov [edi+esi+8], eax
|
|
nf25_24:mov eax, ebx
|
|
mov [edi+12], eax
|
|
mov [edi+esi+12], eax
|
|
lea edi, [edi+esi*2]
|
|
|
|
nf25_31:mov eax, ebx
|
|
mov [edi], eax
|
|
mov [edi+esi], eax
|
|
nf25_32:mov eax, ebx
|
|
mov [edi+4], eax
|
|
mov [edi+esi+4], eax
|
|
nf25_33:mov eax, ebx
|
|
mov [edi+8], eax
|
|
mov [edi+esi+8], eax
|
|
nf25_34:mov eax, ebx
|
|
mov [edi+12], eax
|
|
mov [edi+esi+12], eax
|
|
lea edi, [edi+esi*2]
|
|
|
|
nf25_41:mov eax, ebx
|
|
mov [edi], eax
|
|
mov [edi+esi], eax
|
|
nf25_42:mov eax, ebx
|
|
mov [edi+4], eax
|
|
mov [edi+esi+4], eax
|
|
nf25_43:mov eax, ebx
|
|
mov [edi+8], eax
|
|
mov [edi+esi+8], eax
|
|
nf25_44:mov eax, ebx
|
|
mov [edi+12], eax
|
|
mov [edi+esi+12], eax
|
|
|
|
add edi, esi
|
|
|
|
pop esi
|
|
pop ebp
|
|
add esi, 12
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
;nf9+32
|
|
nf41: ; low 4x8x2 (16 bytes)
|
|
test word ptr [esi+4], 08000h
|
|
jnz nf57
|
|
|
|
if 0 ;debug
|
|
add esi, 16
|
|
mov ebx, 0
|
|
jmp nf_solid
|
|
endif
|
|
xor eax, eax
|
|
lea ecx, nfhpk_mov8
|
|
lea edx, byte ptr ds:nf41_11+1
|
|
|
|
mov al, [esi+8]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf41_11-nf41_11)], bl
|
|
mov [edx+(nf41_12-nf41_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf41_13-nf41_11)], bl
|
|
mov [edx+(nf41_14-nf41_11)], bh
|
|
|
|
mov al, [esi+9]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf41_21-nf41_11)], bl
|
|
mov [edx+(nf41_22-nf41_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf41_23-nf41_11)], bl
|
|
mov [edx+(nf41_24-nf41_11)], bh
|
|
|
|
|
|
mov al, [esi+10]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf41_31-nf41_11)], bl
|
|
mov [edx+(nf41_32-nf41_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf41_33-nf41_11)], bl
|
|
mov [edx+(nf41_34-nf41_11)], bh
|
|
|
|
mov al, [esi+11]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf41_41-nf41_11)], bl
|
|
mov [edx+(nf41_42-nf41_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf41_43-nf41_11)], bl
|
|
mov [edx+(nf41_44-nf41_11)], bh
|
|
|
|
lea edx, [edx+(nf41_51-nf41_11)]
|
|
|
|
mov al, [esi+12]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf41_51-nf41_51)], bl
|
|
mov [edx+(nf41_52-nf41_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf41_53-nf41_51)], bl
|
|
mov [edx+(nf41_54-nf41_51)], bh
|
|
|
|
mov al, [esi+13]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf41_61-nf41_51)], bl
|
|
mov [edx+(nf41_62-nf41_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf41_63-nf41_51)], bl
|
|
mov [edx+(nf41_64-nf41_51)], bh
|
|
|
|
|
|
mov al, [esi+14]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf41_71-nf41_51)], bl
|
|
mov [edx+(nf41_72-nf41_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf41_73-nf41_51)], bl
|
|
mov [edx+(nf41_74-nf41_51)], bh
|
|
|
|
mov al, [esi+15]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf41_81-nf41_51)], bl
|
|
mov [edx+(nf41_82-nf41_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf41_83-nf41_51)], bl
|
|
mov [edx+(nf41_84-nf41_51)], bh
|
|
|
|
push ebp
|
|
push esi
|
|
; Load ebx,edx,ecx,ebp with four colors, duplicated in high order.
|
|
Trans16 cx, esi, 1
|
|
shrd ebx, ecx, 16
|
|
mov bx, cx
|
|
Trans16 cx, esi+2
|
|
shrd edx, ecx, 16
|
|
mov dx, cx
|
|
Trans16 cx, esi+4
|
|
shrd eax, ecx, 16
|
|
mov ax, cx
|
|
push eax
|
|
Trans16 cx, esi+6
|
|
shrd ebp, ecx, 16
|
|
mov bp, cx
|
|
pop ecx
|
|
mov esi, nf_width
|
|
|
|
jmp nf41_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf41_0:
|
|
|
|
nf41_11:mov [ebp+0], ebx
|
|
nf41_12:mov [ebp+4], ebx
|
|
nf41_13:mov [ebp+8], ebx
|
|
nf41_14:mov [ebp+12], ebx
|
|
add edi, esi
|
|
|
|
nf41_21:mov [ebp+0], ebx
|
|
nf41_22:mov [ebp+4], ebx
|
|
nf41_23:mov [ebp+8], ebx
|
|
nf41_24:mov [ebp+12], ebx
|
|
add edi, esi
|
|
|
|
nf41_31:mov [ebp+0], ebx
|
|
nf41_32:mov [ebp+4], ebx
|
|
nf41_33:mov [ebp+8], ebx
|
|
nf41_34:mov [ebp+12], ebx
|
|
add edi, esi
|
|
|
|
nf41_41:mov [ebp+0], ebx
|
|
nf41_42:mov [ebp+4], ebx
|
|
nf41_43:mov [ebp+8], ebx
|
|
nf41_44:mov [ebp+12], ebx
|
|
add edi, esi
|
|
|
|
nf41_51:mov [ebp+0], ebx
|
|
nf41_52:mov [ebp+4], ebx
|
|
nf41_53:mov [ebp+8], ebx
|
|
nf41_54:mov [ebp+12], ebx
|
|
add edi, esi
|
|
|
|
nf41_61:mov [ebp+0], ebx
|
|
nf41_62:mov [ebp+4], ebx
|
|
nf41_63:mov [ebp+8], ebx
|
|
nf41_64:mov [ebp+12], ebx
|
|
add edi, esi
|
|
|
|
nf41_71:mov [ebp+0], ebx
|
|
nf41_72:mov [ebp+4], ebx
|
|
nf41_73:mov [ebp+8], ebx
|
|
nf41_74:mov [ebp+12], ebx
|
|
add edi, esi
|
|
|
|
nf41_81:mov [ebp+0], ebx
|
|
nf41_82:mov [ebp+4], ebx
|
|
nf41_83:mov [ebp+8], ebx
|
|
nf41_84:mov [ebp+12], ebx
|
|
|
|
pop esi
|
|
pop ebp
|
|
add esi, 16
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
;nf9+48
|
|
nf57: ; low 8x4x2 (16 bytes)
|
|
if 0 ;debug
|
|
add esi, 16
|
|
mov ebx, 0
|
|
jmp nf_solid
|
|
endif
|
|
xor eax, eax
|
|
lea ecx, nfhpk_mov4
|
|
lea edx, byte ptr ds:nf57_11+2
|
|
|
|
mov al, [esi+8]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf57_11-nf57_11)], bh
|
|
mov [edx+(nf57_12-nf57_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf57_13-nf57_11)], bh
|
|
mov [edx+(nf57_14-nf57_11)], bl
|
|
|
|
mov al, [esi+9]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf57_15-nf57_11)], bh
|
|
mov [edx+(nf57_16-nf57_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf57_17-nf57_11)], bh
|
|
mov [edx+(nf57_18-nf57_11)], bl
|
|
|
|
|
|
mov al, [esi+10]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf57_21-nf57_11)], bh
|
|
mov [edx+(nf57_22-nf57_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf57_23-nf57_11)], bh
|
|
mov [edx+(nf57_24-nf57_11)], bl
|
|
|
|
mov al, [esi+11]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf57_25-nf57_11)], bh
|
|
mov [edx+(nf57_26-nf57_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf57_27-nf57_11)], bh
|
|
mov [edx+(nf57_28-nf57_11)], bl
|
|
|
|
|
|
mov al, [esi+12]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf57_31-nf57_11)], bh
|
|
mov [edx+(nf57_32-nf57_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf57_33-nf57_11)], bh
|
|
mov [edx+(nf57_34-nf57_11)], bl
|
|
|
|
mov al, [esi+13]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf57_35-nf57_11)], bh
|
|
mov [edx+(nf57_36-nf57_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf57_37-nf57_11)], bh
|
|
mov [edx+(nf57_38-nf57_11)], bl
|
|
|
|
|
|
mov al, [esi+14]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf57_41-nf57_11)], bh
|
|
mov [edx+(nf57_42-nf57_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf57_43-nf57_11)], bh
|
|
mov [edx+(nf57_44-nf57_11)], bl
|
|
|
|
mov al, [esi+15]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf57_45-nf57_11)], bh
|
|
mov [edx+(nf57_46-nf57_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf57_47-nf57_11)], bh
|
|
mov [edx+(nf57_48-nf57_11)], bl
|
|
|
|
push ebp
|
|
push esi
|
|
; Load bx,dx,cx,bp with four colors
|
|
Trans16 bx, esi, 1
|
|
Trans16 dx, esi+2
|
|
Trans16 cx, esi+4, 1
|
|
Trans16 bp, esi+6
|
|
mov esi, nf_width
|
|
|
|
jmp nf57_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf57_0:
|
|
|
|
nf57_11:mov ax, bx
|
|
shl eax, 16
|
|
nf57_12:mov ax, bx
|
|
mov [edi], eax
|
|
mov [edi+esi], eax
|
|
nf57_13:mov ax, bx
|
|
shl eax, 16
|
|
nf57_14:mov ax, bx
|
|
mov [edi+4], eax
|
|
mov [edi+esi+4], eax
|
|
nf57_15:mov ax, bx
|
|
shl eax, 16
|
|
nf57_16:mov ax, bx
|
|
mov [edi+8], eax
|
|
mov [edi+esi+8], eax
|
|
nf57_17:mov ax, bx
|
|
shl eax, 16
|
|
nf57_18:mov ax, bx
|
|
mov [edi+12], eax
|
|
mov [edi+esi+12], eax
|
|
lea edi, [edi+esi*2]
|
|
|
|
nf57_21:mov ax, bx
|
|
shl eax, 16
|
|
nf57_22:mov ax, bx
|
|
mov [edi], eax
|
|
mov [edi+esi], eax
|
|
nf57_23:mov ax, bx
|
|
shl eax, 16
|
|
nf57_24:mov ax, bx
|
|
mov [edi+4], eax
|
|
mov [edi+esi+4], eax
|
|
nf57_25:mov ax, bx
|
|
shl eax, 16
|
|
nf57_26:mov ax, bx
|
|
mov [edi+8], eax
|
|
mov [edi+esi+8], eax
|
|
nf57_27:mov ax, bx
|
|
shl eax, 16
|
|
nf57_28:mov ax, bx
|
|
mov [edi+12], eax
|
|
mov [edi+esi+12], eax
|
|
lea edi, [edi+esi*2]
|
|
|
|
nf57_31:mov ax, bx
|
|
shl eax, 16
|
|
nf57_32:mov ax, bx
|
|
mov [edi], eax
|
|
mov [edi+esi], eax
|
|
nf57_33:mov ax, bx
|
|
shl eax, 16
|
|
nf57_34:mov ax, bx
|
|
mov [edi+4], eax
|
|
mov [edi+esi+4], eax
|
|
nf57_35:mov ax, bx
|
|
shl eax, 16
|
|
nf57_36:mov ax, bx
|
|
mov [edi+8], eax
|
|
mov [edi+esi+8], eax
|
|
nf57_37:mov ax, bx
|
|
shl eax, 16
|
|
nf57_38:mov ax, bx
|
|
mov [edi+12], eax
|
|
mov [edi+esi+12], eax
|
|
lea edi, [edi+esi*2]
|
|
|
|
nf57_41:mov ax, bx
|
|
shl eax, 16
|
|
nf57_42:mov ax, bx
|
|
mov [edi], eax
|
|
mov [edi+esi], eax
|
|
nf57_43:mov ax, bx
|
|
shl eax, 16
|
|
nf57_44:mov ax, bx
|
|
mov [edi+4], eax
|
|
mov [edi+esi+4], eax
|
|
nf57_45:mov ax, bx
|
|
shl eax, 16
|
|
nf57_46:mov ax, bx
|
|
mov [edi+8], eax
|
|
mov [edi+esi+8], eax
|
|
nf57_47:mov ax, bx
|
|
shl eax, 16
|
|
nf57_48:mov ax, bx
|
|
mov [edi+12], eax
|
|
mov [edi+esi+12], eax
|
|
add edi, esi
|
|
|
|
pop esi
|
|
pop ebp
|
|
add esi, 16
|
|
sub edi, nfpk_back_right
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf10: ; 2x2 4x4x2 (48 bytes)
|
|
|
|
test word ptr [esi], 08000h
|
|
jnz nf26
|
|
|
|
if 0 ;debug
|
|
add esi, 48
|
|
mov ebx, 0
|
|
jmp nf_solid
|
|
endif
|
|
xor eax, eax
|
|
lea ecx, nfhpk_mov4
|
|
lea edx, byte ptr ds:nf10_11+2
|
|
|
|
mov al, [esi+8]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_11-nf10_11)], bh
|
|
mov [edx+(nf10_12-nf10_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf10_13-nf10_11)], bh
|
|
mov [edx+(nf10_14-nf10_11)], bl
|
|
|
|
mov al, [esi+9]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_15-nf10_11)], bh
|
|
mov [edx+(nf10_16-nf10_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf10_17-nf10_11)], bh
|
|
mov [edx+(nf10_18-nf10_11)], bl
|
|
|
|
|
|
mov al, [esi+10]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_21-nf10_11)], bh
|
|
mov [edx+(nf10_22-nf10_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf10_23-nf10_11)], bh
|
|
mov [edx+(nf10_24-nf10_11)], bl
|
|
|
|
mov al, [esi+11]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_25-nf10_11)], bh
|
|
mov [edx+(nf10_26-nf10_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf10_27-nf10_11)], bh
|
|
mov [edx+(nf10_28-nf10_11)], bl
|
|
|
|
|
|
mov al, [esi+20]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_31-nf10_11)], bh
|
|
mov [edx+(nf10_32-nf10_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf10_33-nf10_11)], bh
|
|
mov [edx+(nf10_34-nf10_11)], bl
|
|
|
|
mov al, [esi+21]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_35-nf10_11)], bh
|
|
mov [edx+(nf10_36-nf10_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf10_37-nf10_11)], bh
|
|
mov [edx+(nf10_38-nf10_11)], bl
|
|
|
|
|
|
mov al, [esi+22]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_41-nf10_11)], bh
|
|
mov [edx+(nf10_42-nf10_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf10_43-nf10_11)], bh
|
|
mov [edx+(nf10_44-nf10_11)], bl
|
|
|
|
mov al, [esi+23]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_45-nf10_11)], bh
|
|
mov [edx+(nf10_46-nf10_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf10_47-nf10_11)], bh
|
|
mov [edx+(nf10_48-nf10_11)], bl
|
|
|
|
|
|
lea edx, [edx+(nf10_51-nf10_11)]
|
|
|
|
mov al, [esi+32]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_51-nf10_51)], bh
|
|
mov [edx+(nf10_52-nf10_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf10_53-nf10_51)], bh
|
|
mov [edx+(nf10_54-nf10_51)], bl
|
|
|
|
mov al, [esi+33]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_55-nf10_51)], bh
|
|
mov [edx+(nf10_56-nf10_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf10_57-nf10_51)], bh
|
|
mov [edx+(nf10_58-nf10_51)], bl
|
|
|
|
|
|
mov al, [esi+34]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_61-nf10_51)], bh
|
|
mov [edx+(nf10_62-nf10_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf10_63-nf10_51)], bh
|
|
mov [edx+(nf10_64-nf10_51)], bl
|
|
|
|
mov al, [esi+35]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_65-nf10_51)], bh
|
|
mov [edx+(nf10_66-nf10_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf10_67-nf10_51)], bh
|
|
mov [edx+(nf10_68-nf10_51)], bl
|
|
|
|
|
|
mov al, [esi+44]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_71-nf10_51)], bh
|
|
mov [edx+(nf10_72-nf10_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf10_73-nf10_51)], bh
|
|
mov [edx+(nf10_74-nf10_51)], bl
|
|
|
|
mov al, [esi+45]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_75-nf10_51)], bh
|
|
mov [edx+(nf10_76-nf10_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf10_77-nf10_51)], bh
|
|
mov [edx+(nf10_78-nf10_51)], bl
|
|
|
|
|
|
mov al, [esi+46]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_81-nf10_51)], bh
|
|
mov [edx+(nf10_82-nf10_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf10_83-nf10_51)], bh
|
|
mov [edx+(nf10_84-nf10_51)], bl
|
|
|
|
mov al, [esi+47]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_85-nf10_51)], bh
|
|
mov [edx+(nf10_86-nf10_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf10_87-nf10_51)], bh
|
|
mov [edx+(nf10_88-nf10_51)], bl
|
|
|
|
push ebp
|
|
push esi
|
|
; Load bx,dx,cx,bp with four colors
|
|
Trans16 bx, esi
|
|
Trans16 dx, esi+2
|
|
Trans16 cx, esi+4
|
|
Trans16 bp, esi+6
|
|
mov esi, nf_width
|
|
|
|
jmp nf10_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf10_0:
|
|
|
|
nf10_11:mov ax, bx
|
|
shl eax, 16
|
|
nf10_12:mov ax, bx
|
|
mov [edi], eax
|
|
nf10_13:mov ax, bx
|
|
shl eax, 16
|
|
nf10_14:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf10_15:mov ax, bx
|
|
shl eax, 16
|
|
nf10_16:mov ax, bx
|
|
mov [edi], eax
|
|
nf10_17:mov ax, bx
|
|
shl eax, 16
|
|
nf10_18:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf10_21:mov ax, bx
|
|
shl eax, 16
|
|
nf10_22:mov ax, bx
|
|
mov [edi], eax
|
|
nf10_23:mov ax, bx
|
|
shl eax, 16
|
|
nf10_24:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf10_25:mov ax, bx
|
|
shl eax, 16
|
|
nf10_26:mov ax, bx
|
|
mov [edi], eax
|
|
nf10_27:mov ax, bx
|
|
shl eax, 16
|
|
nf10_28:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
; Load bx,dx,cx,bp with four colors
|
|
mov esi, [esp]
|
|
Trans16 bx, esi+12
|
|
Trans16 dx, esi+14
|
|
Trans16 cx, esi+16
|
|
Trans16 bp, esi+18
|
|
mov esi, nf_width
|
|
|
|
nf10_31:mov ax, bx
|
|
shl eax, 16
|
|
nf10_32:mov ax, bx
|
|
mov [edi], eax
|
|
nf10_33:mov ax, bx
|
|
shl eax, 16
|
|
nf10_34:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf10_35:mov ax, bx
|
|
shl eax, 16
|
|
nf10_36:mov ax, bx
|
|
mov [edi], eax
|
|
nf10_37:mov ax, bx
|
|
shl eax, 16
|
|
nf10_38:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf10_41:mov ax, bx
|
|
shl eax, 16
|
|
nf10_42:mov ax, bx
|
|
mov [edi], eax
|
|
nf10_43:mov ax, bx
|
|
shl eax, 16
|
|
nf10_44:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf10_45:mov ax, bx
|
|
shl eax, 16
|
|
nf10_46:mov ax, bx
|
|
mov [edi], eax
|
|
nf10_47:mov ax, bx
|
|
shl eax, 16
|
|
nf10_48:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
lea eax, [esi*8-8]
|
|
sub edi, eax
|
|
|
|
; Load bx,dx,cx,bp with four colors
|
|
mov esi, [esp]
|
|
Trans16 bx, esi+24
|
|
Trans16 dx, esi+26
|
|
Trans16 cx, esi+28
|
|
Trans16 bp, esi+30
|
|
mov esi, nf_width
|
|
|
|
nf10_51:mov ax, bx
|
|
shl eax, 16
|
|
nf10_52:mov ax, bx
|
|
mov [edi], eax
|
|
nf10_53:mov ax, bx
|
|
shl eax, 16
|
|
nf10_54:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf10_55:mov ax, bx
|
|
shl eax, 16
|
|
nf10_56:mov ax, bx
|
|
mov [edi], eax
|
|
nf10_57:mov ax, bx
|
|
shl eax, 16
|
|
nf10_58:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf10_61:mov ax, bx
|
|
shl eax, 16
|
|
nf10_62:mov ax, bx
|
|
mov [edi], eax
|
|
nf10_63:mov ax, bx
|
|
shl eax, 16
|
|
nf10_64:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf10_65:mov ax, bx
|
|
shl eax, 16
|
|
nf10_66:mov ax, bx
|
|
mov [edi], eax
|
|
nf10_67:mov ax, bx
|
|
shl eax, 16
|
|
nf10_68:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
; Load bx,dx,cx,bp with four colors
|
|
mov esi, [esp]
|
|
Trans16 bx, esi+36
|
|
Trans16 dx, esi+38
|
|
Trans16 cx, esi+40
|
|
Trans16 bp, esi+42
|
|
mov esi, nf_width
|
|
|
|
nf10_71:mov ax, bx
|
|
shl eax, 16
|
|
nf10_72:mov ax, bx
|
|
mov [edi], eax
|
|
nf10_73:mov ax, bx
|
|
shl eax, 16
|
|
nf10_74:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf10_75:mov ax, bx
|
|
shl eax, 16
|
|
nf10_76:mov ax, bx
|
|
mov [edi], eax
|
|
nf10_77:mov ax, bx
|
|
shl eax, 16
|
|
nf10_78:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf10_81:mov ax, bx
|
|
shl eax, 16
|
|
nf10_82:mov ax, bx
|
|
mov [edi], eax
|
|
nf10_83:mov ax, bx
|
|
shl eax, 16
|
|
nf10_84:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf10_85:mov ax, bx
|
|
shl eax, 16
|
|
nf10_86:mov ax, bx
|
|
mov [edi], eax
|
|
nf10_87:mov ax, bx
|
|
shl eax, 16
|
|
nf10_88:mov ax, bx
|
|
mov [edi+4], eax
|
|
|
|
pop esi
|
|
pop ebp
|
|
add esi, 48
|
|
sub edi, 8
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
;nf10+16
|
|
nf26: ; 2x1 4x8x2 (32 bytes)
|
|
|
|
test word ptr [esi+16], 08000h
|
|
jnz nf42
|
|
|
|
if 0 ;debug
|
|
add esi, 32
|
|
mov ebx, 0
|
|
jmp nf_solid
|
|
endif
|
|
xor eax, eax
|
|
lea ecx, nfhpk_mov4
|
|
lea edx, byte ptr ds:nf26_11+2
|
|
|
|
mov al, [esi+8]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_11-nf26_11)], bh
|
|
mov [edx+(nf26_12-nf26_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf26_13-nf26_11)], bh
|
|
mov [edx+(nf26_14-nf26_11)], bl
|
|
|
|
mov al, [esi+9]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_15-nf26_11)], bh
|
|
mov [edx+(nf26_16-nf26_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf26_17-nf26_11)], bh
|
|
mov [edx+(nf26_18-nf26_11)], bl
|
|
|
|
|
|
mov al, [esi+10]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_21-nf26_11)], bh
|
|
mov [edx+(nf26_22-nf26_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf26_23-nf26_11)], bh
|
|
mov [edx+(nf26_24-nf26_11)], bl
|
|
|
|
mov al, [esi+11]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_25-nf26_11)], bh
|
|
mov [edx+(nf26_26-nf26_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf26_27-nf26_11)], bh
|
|
mov [edx+(nf26_28-nf26_11)], bl
|
|
|
|
mov al, [esi+12]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_31-nf26_11)], bh
|
|
mov [edx+(nf26_32-nf26_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf26_33-nf26_11)], bh
|
|
mov [edx+(nf26_34-nf26_11)], bl
|
|
|
|
mov al, [esi+13]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_35-nf26_11)], bh
|
|
mov [edx+(nf26_36-nf26_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf26_37-nf26_11)], bh
|
|
mov [edx+(nf26_38-nf26_11)], bl
|
|
|
|
|
|
mov al, [esi+14]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_41-nf26_11)], bh
|
|
mov [edx+(nf26_42-nf26_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf26_43-nf26_11)], bh
|
|
mov [edx+(nf26_44-nf26_11)], bl
|
|
|
|
mov al, [esi+15]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_45-nf26_11)], bh
|
|
mov [edx+(nf26_46-nf26_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf26_47-nf26_11)], bh
|
|
mov [edx+(nf26_48-nf26_11)], bl
|
|
|
|
|
|
lea edx, [edx+(nf26_51-nf26_11)]
|
|
|
|
mov al, [esi+24]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_51-nf26_51)], bh
|
|
mov [edx+(nf26_52-nf26_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf26_53-nf26_51)], bh
|
|
mov [edx+(nf26_54-nf26_51)], bl
|
|
|
|
mov al, [esi+25]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_55-nf26_51)], bh
|
|
mov [edx+(nf26_56-nf26_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf26_57-nf26_51)], bh
|
|
mov [edx+(nf26_58-nf26_51)], bl
|
|
|
|
|
|
mov al, [esi+26]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_61-nf26_51)], bh
|
|
mov [edx+(nf26_62-nf26_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf26_63-nf26_51)], bh
|
|
mov [edx+(nf26_64-nf26_51)], bl
|
|
|
|
mov al, [esi+27]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_65-nf26_51)], bh
|
|
mov [edx+(nf26_66-nf26_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf26_67-nf26_51)], bh
|
|
mov [edx+(nf26_68-nf26_51)], bl
|
|
|
|
|
|
mov al, [esi+28]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_71-nf26_51)], bh
|
|
mov [edx+(nf26_72-nf26_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf26_73-nf26_51)], bh
|
|
mov [edx+(nf26_74-nf26_51)], bl
|
|
|
|
mov al, [esi+29]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_75-nf26_51)], bh
|
|
mov [edx+(nf26_76-nf26_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf26_77-nf26_51)], bh
|
|
mov [edx+(nf26_78-nf26_51)], bl
|
|
|
|
|
|
mov al, [esi+30]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_81-nf26_51)], bh
|
|
mov [edx+(nf26_82-nf26_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf26_83-nf26_51)], bh
|
|
mov [edx+(nf26_84-nf26_51)], bl
|
|
|
|
mov al, [esi+31]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_85-nf26_51)], bh
|
|
mov [edx+(nf26_86-nf26_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf26_87-nf26_51)], bh
|
|
mov [edx+(nf26_88-nf26_51)], bl
|
|
|
|
push ebp
|
|
push esi
|
|
; Load bx,dx,cx,bp with four colors
|
|
Trans16 bx, esi, 1
|
|
Trans16 dx, esi+2
|
|
Trans16 cx, esi+4
|
|
Trans16 bp, esi+6
|
|
mov esi, nf_width
|
|
|
|
jmp nf26_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf26_0:
|
|
|
|
nf26_11:mov ax, bx
|
|
shl eax, 16
|
|
nf26_12:mov ax, bx
|
|
mov [edi], eax
|
|
nf26_13:mov ax, bx
|
|
shl eax, 16
|
|
nf26_14:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf26_15:mov ax, bx
|
|
shl eax, 16
|
|
nf26_16:mov ax, bx
|
|
mov [edi], eax
|
|
nf26_17:mov ax, bx
|
|
shl eax, 16
|
|
nf26_18:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf26_21:mov ax, bx
|
|
shl eax, 16
|
|
nf26_22:mov ax, bx
|
|
mov [edi], eax
|
|
nf26_23:mov ax, bx
|
|
shl eax, 16
|
|
nf26_24:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf26_25:mov ax, bx
|
|
shl eax, 16
|
|
nf26_26:mov ax, bx
|
|
mov [edi], eax
|
|
nf26_27:mov ax, bx
|
|
shl eax, 16
|
|
nf26_28:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf26_31:mov ax, bx
|
|
shl eax, 16
|
|
nf26_32:mov ax, bx
|
|
mov [edi], eax
|
|
nf26_33:mov ax, bx
|
|
shl eax, 16
|
|
nf26_34:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf26_35:mov ax, bx
|
|
shl eax, 16
|
|
nf26_36:mov ax, bx
|
|
mov [edi], eax
|
|
nf26_37:mov ax, bx
|
|
shl eax, 16
|
|
nf26_38:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf26_41:mov ax, bx
|
|
shl eax, 16
|
|
nf26_42:mov ax, bx
|
|
mov [edi], eax
|
|
nf26_43:mov ax, bx
|
|
shl eax, 16
|
|
nf26_44:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf26_45:mov ax, bx
|
|
shl eax, 16
|
|
nf26_46:mov ax, bx
|
|
mov [edi], eax
|
|
nf26_47:mov ax, bx
|
|
shl eax, 16
|
|
nf26_48:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
lea eax, [esi*8-8]
|
|
sub edi, eax
|
|
|
|
; Load bx,dx,cx,bp with four colors
|
|
mov esi, [esp]
|
|
Trans16 bx, esi+16
|
|
Trans16 dx, esi+18
|
|
Trans16 cx, esi+20
|
|
Trans16 bp, esi+22
|
|
mov esi, nf_width
|
|
|
|
nf26_51:mov ax, bx
|
|
shl eax, 16
|
|
nf26_52:mov ax, bx
|
|
mov [edi], eax
|
|
nf26_53:mov ax, bx
|
|
shl eax, 16
|
|
nf26_54:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf26_55:mov ax, bx
|
|
shl eax, 16
|
|
nf26_56:mov ax, bx
|
|
mov [edi], eax
|
|
nf26_57:mov ax, bx
|
|
shl eax, 16
|
|
nf26_58:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf26_61:mov ax, bx
|
|
shl eax, 16
|
|
nf26_62:mov ax, bx
|
|
mov [edi], eax
|
|
nf26_63:mov ax, bx
|
|
shl eax, 16
|
|
nf26_64:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf26_65:mov ax, bx
|
|
shl eax, 16
|
|
nf26_66:mov ax, bx
|
|
mov [edi], eax
|
|
nf26_67:mov ax, bx
|
|
shl eax, 16
|
|
nf26_68:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf26_71:mov ax, bx
|
|
shl eax, 16
|
|
nf26_72:mov ax, bx
|
|
mov [edi], eax
|
|
nf26_73:mov ax, bx
|
|
shl eax, 16
|
|
nf26_74:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf26_75:mov ax, bx
|
|
shl eax, 16
|
|
nf26_76:mov ax, bx
|
|
mov [edi], eax
|
|
nf26_77:mov ax, bx
|
|
shl eax, 16
|
|
nf26_78:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf26_81:mov ax, bx
|
|
shl eax, 16
|
|
nf26_82:mov ax, bx
|
|
mov [edi], eax
|
|
nf26_83:mov ax, bx
|
|
shl eax, 16
|
|
nf26_84:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf26_85:mov ax, bx
|
|
shl eax, 16
|
|
nf26_86:mov ax, bx
|
|
mov [edi], eax
|
|
nf26_87:mov ax, bx
|
|
shl eax, 16
|
|
nf26_88:mov ax, bx
|
|
mov [edi+4], eax
|
|
|
|
pop esi
|
|
pop ebp
|
|
add esi, 32
|
|
sub edi, 8
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
;nf10+32
|
|
nf42: ; 1x2 8x4x2 (32 bytes)
|
|
|
|
if 0 ;debug
|
|
add esi, 32
|
|
mov ebx, 0
|
|
jmp nf_solid
|
|
endif
|
|
xor eax, eax
|
|
lea ecx, nfhpk_mov4
|
|
lea edx, byte ptr ds:nf42_11+2
|
|
|
|
mov al, [esi+8]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_11-nf42_11)], bh
|
|
mov [edx+(nf42_12-nf42_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf42_13-nf42_11)], bh
|
|
mov [edx+(nf42_14-nf42_11)], bl
|
|
|
|
mov al, [esi+9]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_15-nf42_11)], bh
|
|
mov [edx+(nf42_16-nf42_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf42_17-nf42_11)], bh
|
|
mov [edx+(nf42_18-nf42_11)], bl
|
|
|
|
|
|
mov al, [esi+10]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_21-nf42_11)], bh
|
|
mov [edx+(nf42_22-nf42_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf42_23-nf42_11)], bh
|
|
mov [edx+(nf42_24-nf42_11)], bl
|
|
|
|
mov al, [esi+11]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_25-nf42_11)], bh
|
|
mov [edx+(nf42_26-nf42_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf42_27-nf42_11)], bh
|
|
mov [edx+(nf42_28-nf42_11)], bl
|
|
|
|
|
|
mov al, [esi+12]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_31-nf42_11)], bh
|
|
mov [edx+(nf42_32-nf42_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf42_33-nf42_11)], bh
|
|
mov [edx+(nf42_34-nf42_11)], bl
|
|
|
|
mov al, [esi+13]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_35-nf42_11)], bh
|
|
mov [edx+(nf42_36-nf42_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf42_37-nf42_11)], bh
|
|
mov [edx+(nf42_38-nf42_11)], bl
|
|
|
|
|
|
mov al, [esi+14]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_41-nf42_11)], bh
|
|
mov [edx+(nf42_42-nf42_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf42_43-nf42_11)], bh
|
|
mov [edx+(nf42_44-nf42_11)], bl
|
|
|
|
mov al, [esi+15]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_45-nf42_11)], bh
|
|
mov [edx+(nf42_46-nf42_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf42_47-nf42_11)], bh
|
|
mov [edx+(nf42_48-nf42_11)], bl
|
|
|
|
|
|
lea edx, [edx+(nf42_51-nf42_11)]
|
|
|
|
mov al, [esi+24]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_51-nf42_51)], bh
|
|
mov [edx+(nf42_52-nf42_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf42_53-nf42_51)], bh
|
|
mov [edx+(nf42_54-nf42_51)], bl
|
|
|
|
mov al, [esi+25]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_55-nf42_51)], bh
|
|
mov [edx+(nf42_56-nf42_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf42_57-nf42_51)], bh
|
|
mov [edx+(nf42_58-nf42_51)], bl
|
|
|
|
|
|
mov al, [esi+26]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_61-nf42_51)], bh
|
|
mov [edx+(nf42_62-nf42_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf42_63-nf42_51)], bh
|
|
mov [edx+(nf42_64-nf42_51)], bl
|
|
|
|
mov al, [esi+27]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_65-nf42_51)], bh
|
|
mov [edx+(nf42_66-nf42_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf42_67-nf42_51)], bh
|
|
mov [edx+(nf42_68-nf42_51)], bl
|
|
|
|
|
|
mov al, [esi+28]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_71-nf42_51)], bh
|
|
mov [edx+(nf42_72-nf42_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf42_73-nf42_51)], bh
|
|
mov [edx+(nf42_74-nf42_51)], bl
|
|
|
|
mov al, [esi+29]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_75-nf42_51)], bh
|
|
mov [edx+(nf42_76-nf42_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf42_77-nf42_51)], bh
|
|
mov [edx+(nf42_78-nf42_51)], bl
|
|
|
|
|
|
mov al, [esi+30]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_81-nf42_51)], bh
|
|
mov [edx+(nf42_82-nf42_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf42_83-nf42_51)], bh
|
|
mov [edx+(nf42_84-nf42_51)], bl
|
|
|
|
mov al, [esi+31]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_85-nf42_51)], bh
|
|
mov [edx+(nf42_86-nf42_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf42_87-nf42_51)], bh
|
|
mov [edx+(nf42_88-nf42_51)], bl
|
|
|
|
push ebp
|
|
push esi
|
|
; Load bx,dx,cx,bp with four colors
|
|
Trans16 bx, esi, 1
|
|
Trans16 dx, esi+2
|
|
Trans16 cx, esi+4
|
|
Trans16 bp, esi+6
|
|
mov esi, nf_width
|
|
|
|
jmp nf42_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf42_0:
|
|
|
|
nf42_11:mov ax, bx
|
|
shl eax, 16
|
|
nf42_12:mov ax, bx
|
|
mov [edi], eax
|
|
nf42_13:mov ax, bx
|
|
shl eax, 16
|
|
nf42_14:mov ax, bx
|
|
mov [edi+4], eax
|
|
nf42_15:mov ax, bx
|
|
shl eax, 16
|
|
nf42_16:mov ax, bx
|
|
mov [edi+8], eax
|
|
nf42_17:mov ax, bx
|
|
shl eax, 16
|
|
nf42_18:mov ax, bx
|
|
mov [edi+12], eax
|
|
add edi, esi
|
|
|
|
nf42_21:mov ax, bx
|
|
shl eax, 16
|
|
nf42_22:mov ax, bx
|
|
mov [edi], eax
|
|
nf42_23:mov ax, bx
|
|
shl eax, 16
|
|
nf42_24:mov ax, bx
|
|
mov [edi+4], eax
|
|
nf42_25:mov ax, bx
|
|
shl eax, 16
|
|
nf42_26:mov ax, bx
|
|
mov [edi+8], eax
|
|
nf42_27:mov ax, bx
|
|
shl eax, 16
|
|
nf42_28:mov ax, bx
|
|
mov [edi+12], eax
|
|
add edi, esi
|
|
|
|
nf42_31:mov ax, bx
|
|
shl eax, 16
|
|
nf42_32:mov ax, bx
|
|
mov [edi], eax
|
|
nf42_33:mov ax, bx
|
|
shl eax, 16
|
|
nf42_34:mov ax, bx
|
|
mov [edi+4], eax
|
|
nf42_35:mov ax, bx
|
|
shl eax, 16
|
|
nf42_36:mov ax, bx
|
|
mov [edi+8], eax
|
|
nf42_37:mov ax, bx
|
|
shl eax, 16
|
|
nf42_38:mov ax, bx
|
|
mov [edi+12], eax
|
|
add edi, esi
|
|
|
|
nf42_41:mov ax, bx
|
|
shl eax, 16
|
|
nf42_42:mov ax, bx
|
|
mov [edi], eax
|
|
nf42_43:mov ax, bx
|
|
shl eax, 16
|
|
nf42_44:mov ax, bx
|
|
mov [edi+4], eax
|
|
nf42_45:mov ax, bx
|
|
shl eax, 16
|
|
nf42_46:mov ax, bx
|
|
mov [edi+8], eax
|
|
nf42_47:mov ax, bx
|
|
shl eax, 16
|
|
nf42_48:mov ax, bx
|
|
mov [edi+12], eax
|
|
add edi, esi
|
|
|
|
; Load bx,dx,cx,bp with four colors
|
|
mov esi, [esp]
|
|
Trans16 bx, esi+16, 1
|
|
Trans16 dx, esi+18
|
|
Trans16 cx, esi+20
|
|
Trans16 bp, esi+22
|
|
mov esi, nf_width
|
|
|
|
nf42_51:mov ax, bx
|
|
shl eax, 16
|
|
nf42_52:mov ax, bx
|
|
mov [edi], eax
|
|
nf42_53:mov ax, bx
|
|
shl eax, 16
|
|
nf42_54:mov ax, bx
|
|
mov [edi+4], eax
|
|
nf42_55:mov ax, bx
|
|
shl eax, 16
|
|
nf42_56:mov ax, bx
|
|
mov [edi+8], eax
|
|
nf42_57:mov ax, bx
|
|
shl eax, 16
|
|
nf42_58:mov ax, bx
|
|
mov [edi+12], eax
|
|
add edi, esi
|
|
|
|
nf42_61:mov ax, bx
|
|
shl eax, 16
|
|
nf42_62:mov ax, bx
|
|
mov [edi], eax
|
|
nf42_63:mov ax, bx
|
|
shl eax, 16
|
|
nf42_64:mov ax, bx
|
|
mov [edi+4], eax
|
|
nf42_65:mov ax, bx
|
|
shl eax, 16
|
|
nf42_66:mov ax, bx
|
|
mov [edi+8], eax
|
|
nf42_67:mov ax, bx
|
|
shl eax, 16
|
|
nf42_68:mov ax, bx
|
|
mov [edi+12], eax
|
|
add edi, esi
|
|
|
|
nf42_71:mov ax, bx
|
|
shl eax, 16
|
|
nf42_72:mov ax, bx
|
|
mov [edi], eax
|
|
nf42_73:mov ax, bx
|
|
shl eax, 16
|
|
nf42_74:mov ax, bx
|
|
mov [edi+4], eax
|
|
nf42_75:mov ax, bx
|
|
shl eax, 16
|
|
nf42_76:mov ax, bx
|
|
mov [edi+8], eax
|
|
nf42_77:mov ax, bx
|
|
shl eax, 16
|
|
nf42_78:mov ax, bx
|
|
mov [edi+12], eax
|
|
add edi, esi
|
|
|
|
nf42_81:mov ax, bx
|
|
shl eax, 16
|
|
nf42_82:mov ax, bx
|
|
mov [edi], eax
|
|
nf42_83:mov ax, bx
|
|
shl eax, 16
|
|
nf42_84:mov ax, bx
|
|
mov [edi+4], eax
|
|
nf42_85:mov ax, bx
|
|
shl eax, 16
|
|
nf42_86:mov ax, bx
|
|
mov [edi+8], eax
|
|
nf42_87:mov ax, bx
|
|
shl eax, 16
|
|
nf42_88:mov ax, bx
|
|
mov [edi+12], eax
|
|
|
|
pop esi
|
|
pop ebp
|
|
add esi, 32
|
|
sub edi, nfpk_back_right
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf11: ; 8x8x16 (128 bytes)
|
|
if 0 ;debug
|
|
add esi, 128
|
|
mov ebx, 0
|
|
jmp nf_solid
|
|
endif
|
|
mov edx, nf_width
|
|
|
|
|
|
Trans16Blk MACRO idx
|
|
Trans16 bx, idx
|
|
mov [edi], bx
|
|
Trans16 bx, idx+2
|
|
mov [edi+2], bx
|
|
Trans16 bx, idx+4
|
|
mov [edi+4], bx
|
|
Trans16 bx, idx+6
|
|
mov [edi+6], bx
|
|
Trans16 bx, idx+8
|
|
mov [edi+8], bx
|
|
Trans16 bx, idx+10
|
|
mov [edi+10], bx
|
|
Trans16 bx, idx+12
|
|
mov [edi+12], bx
|
|
Trans16 bx, idx+14
|
|
mov [edi+14], bx
|
|
ENDM
|
|
|
|
Trans16Blk esi ;0
|
|
add edi, edx
|
|
Trans16Blk esi+16 ;1
|
|
add edi, edx
|
|
Trans16Blk esi+32 ;2
|
|
add edi, edx
|
|
Trans16Blk esi+48 ;3
|
|
add edi, edx
|
|
Trans16Blk esi+64 ;4
|
|
add edi, edx
|
|
Trans16Blk esi+80 ;5
|
|
add edi, edx
|
|
Trans16Blk esi+96 ;6
|
|
add edi, edx
|
|
Trans16Blk esi+112 ;7
|
|
|
|
add esi, 128
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf12: ; low 4x4x16 (32 bytes)
|
|
if 0 ;debug
|
|
add esi, 32
|
|
mov ebx, 0
|
|
jmp nf_solid
|
|
endif
|
|
mov edx, nf_width
|
|
|
|
Trans16 bx, esi
|
|
shrd eax, ebx, 16
|
|
mov ax, bx
|
|
mov [edi], eax
|
|
mov [edi+edx], eax
|
|
|
|
Trans16 bx, esi+2
|
|
shrd eax, ebx, 16
|
|
mov ax, bx
|
|
mov [edi+4], eax
|
|
mov [edi+edx+4], eax
|
|
|
|
Trans16 bx, esi+4
|
|
shrd eax, ebx, 16
|
|
mov ax, bx
|
|
mov [edi+8], eax
|
|
mov [edi+edx+8], eax
|
|
|
|
Trans16 bx, esi+6
|
|
shrd eax, ebx, 16
|
|
mov ax, bx
|
|
mov [edi+12], eax
|
|
mov [edi+edx+12], eax
|
|
|
|
lea edi, [edi+edx*2]
|
|
|
|
Trans16 bx, esi+8
|
|
shrd eax, ebx, 16
|
|
mov ax, bx
|
|
mov [edi], eax
|
|
mov [edi+edx], eax
|
|
|
|
Trans16 bx, esi+10
|
|
shrd eax, ebx, 16
|
|
mov ax, bx
|
|
mov [edi+4], eax
|
|
mov [edi+edx+4], eax
|
|
|
|
Trans16 bx, esi+12
|
|
shrd eax, ebx, 16
|
|
mov ax, bx
|
|
mov [edi+8], eax
|
|
mov [edi+edx+8], eax
|
|
|
|
Trans16 bx, esi+14
|
|
shrd eax, ebx, 16
|
|
mov ax, bx
|
|
mov [edi+12], eax
|
|
mov [edi+edx+12], eax
|
|
|
|
lea edi, [edi+edx*2]
|
|
|
|
Trans16 bx, esi+16
|
|
shrd eax, ebx, 16
|
|
mov ax, bx
|
|
mov [edi], eax
|
|
mov [edi+edx], eax
|
|
|
|
Trans16 bx, esi+18
|
|
shrd eax, ebx, 16
|
|
mov ax, bx
|
|
mov [edi+4], eax
|
|
mov [edi+edx+4], eax
|
|
|
|
Trans16 bx, esi+20
|
|
shrd eax, ebx, 16
|
|
mov ax, bx
|
|
mov [edi+8], eax
|
|
mov [edi+edx+8], eax
|
|
|
|
Trans16 bx, esi+22
|
|
shrd eax, ebx, 16
|
|
mov ax, bx
|
|
mov [edi+12], eax
|
|
mov [edi+edx+12], eax
|
|
|
|
lea edi, [edi+edx*2]
|
|
|
|
Trans16 bx, esi+24
|
|
shrd eax, ebx, 16
|
|
mov ax, bx
|
|
mov [edi], eax
|
|
mov [edi+edx], eax
|
|
|
|
Trans16 bx, esi+26
|
|
shrd eax, ebx, 16
|
|
mov ax, bx
|
|
mov [edi+4], eax
|
|
mov [edi+edx+4], eax
|
|
|
|
Trans16 bx, esi+28
|
|
shrd eax, ebx, 16
|
|
mov ax, bx
|
|
mov [edi+8], eax
|
|
mov [edi+edx+8], eax
|
|
|
|
Trans16 bx, esi+30
|
|
shrd eax, ebx, 16
|
|
mov ax, bx
|
|
mov [edi+12], eax
|
|
mov [edi+edx+12], eax
|
|
|
|
add edi, edx
|
|
|
|
sub edi, nfpk_back_right
|
|
add esi, 32
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf13: ; 2x2 4x4x0 (8 bytes)
|
|
if 0 ;debug
|
|
add esi, 8
|
|
mov ebx, 0
|
|
jmp nf_solid
|
|
endif
|
|
mov edx, nf_width
|
|
|
|
Trans16 cx, esi
|
|
shrd ebx, ecx, 16
|
|
mov bx, cx
|
|
|
|
Trans16 cx, esi+2
|
|
shrd eax, ecx, 16
|
|
mov ax, cx
|
|
mov ecx, eax
|
|
|
|
mov [edi], ebx
|
|
mov [edi+4], ebx
|
|
mov [edi+8], ecx
|
|
mov [edi+12], ecx
|
|
mov [edi+edx], ebx
|
|
mov [edi+edx+4], ebx
|
|
mov [edi+edx+8], ecx
|
|
mov [edi+edx+12], ecx
|
|
lea edi, [edi+edx*2]
|
|
mov [edi], ebx
|
|
mov [edi+4], ebx
|
|
mov [edi+8], ecx
|
|
mov [edi+12], ecx
|
|
mov [edi+edx], ebx
|
|
mov [edi+edx+4], ebx
|
|
mov [edi+edx+8], ecx
|
|
mov [edi+edx+12], ecx
|
|
lea edi, [edi+edx*2]
|
|
|
|
Trans16 cx, esi+4
|
|
shrd ebx, ecx, 16
|
|
mov bx, cx
|
|
|
|
Trans16 cx, esi+6
|
|
shrd eax, ecx, 16
|
|
mov ax, cx
|
|
mov ecx, eax
|
|
|
|
mov [edi], ebx
|
|
mov [edi+4], ebx
|
|
mov [edi+8], ecx
|
|
mov [edi+12], ecx
|
|
mov [edi+edx], ebx
|
|
mov [edi+edx+4], ebx
|
|
mov [edi+edx+8], ecx
|
|
mov [edi+edx+12], ecx
|
|
lea edi, [edi+edx*2]
|
|
mov [edi], ebx
|
|
mov [edi+4], ebx
|
|
mov [edi+8], ecx
|
|
mov [edi+12], ecx
|
|
mov [edi+edx], ebx
|
|
mov [edi+edx+4], ebx
|
|
mov [edi+edx+8], ecx
|
|
mov [edi+edx+12], ecx
|
|
add edi, edx
|
|
|
|
sub edi, nfpk_back_right
|
|
add esi, 8
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf14: ; 8x8x0 (2 bytes)
|
|
Trans16 cx, esi
|
|
add esi, 2
|
|
shrd ebx, ecx, 16
|
|
mov bx, cx
|
|
|
|
nf_solid:
|
|
mov edx, nf_width
|
|
|
|
mov [edi], ebx
|
|
mov [edi+4], ebx
|
|
mov [edi+8], ebx
|
|
mov [edi+12], ebx
|
|
add edi, edx
|
|
mov [edi], ebx
|
|
mov [edi+4], ebx
|
|
mov [edi+8], ebx
|
|
mov [edi+12], ebx
|
|
add edi, edx
|
|
mov [edi], ebx
|
|
mov [edi+4], ebx
|
|
mov [edi+8], ebx
|
|
mov [edi+12], ebx
|
|
add edi, edx
|
|
mov [edi], ebx
|
|
mov [edi+4], ebx
|
|
mov [edi+8], ebx
|
|
mov [edi+12], ebx
|
|
add edi, edx
|
|
mov [edi], ebx
|
|
mov [edi+4], ebx
|
|
mov [edi+8], ebx
|
|
mov [edi+12], ebx
|
|
add edi, edx
|
|
mov [edi], ebx
|
|
mov [edi+4], ebx
|
|
mov [edi+8], ebx
|
|
mov [edi+12], ebx
|
|
add edi, edx
|
|
mov [edi], ebx
|
|
mov [edi+4], ebx
|
|
mov [edi+8], ebx
|
|
mov [edi+12], ebx
|
|
add edi, edx
|
|
mov [edi], ebx
|
|
mov [edi+4], ebx
|
|
mov [edi+8], ebx
|
|
mov [edi+12], ebx
|
|
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf15: ; unused
|
|
retn
|
|
|
|
nfHPkDecomp ENDP
|
|
|
|
; Normal version
|
|
;
|
|
nfPkDecomp PROC USES ESI EDI EBX, \
|
|
ops:PTRBYTE, comp:PTRBYTE, \
|
|
x:DWORD, y:DWORD, w:DWORD, h:DWORD
|
|
LOCAL tbuf: PTRBYTE
|
|
LOCAL new_row:DWORD
|
|
LOCAL DiffBufPtrs:DWORD
|
|
|
|
LOCAL nfpk_back_right: DWORD
|
|
LOCAL wcnt:DWORD
|
|
|
|
LOG_LABEL "StartPkDecomp"
|
|
|
|
.data
|
|
nfpk_OpTbl label dword
|
|
dword offset nf0 ; Prev Same (0)
|
|
dword offset nf1 ; No change (and copied to screen) (0)
|
|
dword offset nf2 ; Near shift from older part of current buf (1)
|
|
dword offset nf3 ; Near shift from newer part of current buf (1)
|
|
dword offset nf4 ; Near shift from previous buffer (1)
|
|
dword offset nf5 ; Far shift from previous buffer (2)
|
|
dword offset nf6 ; Far shift from current buffer (2)
|
|
; [Or if COMPOPS, run of no changes (0)]
|
|
dword offset nf7 ; 8x8x1 (10 bytes) or low 4x4x1 (4 bytes)
|
|
dword offset nf8 ; 2x2 4x4x1 (16 bytes) or 2x1 4x8x1 (12 bytes) or 1x2 8x4x1 (12 bytes)
|
|
dword offset nf9 ; 8x8x2 (20 bytes) or low 4x4x2 (8 bytes) or
|
|
; low 4x8x2 (12 bytes) or low 8x4x2 (12 bytes)
|
|
dword offset nf10 ; 2x2 4x4x2 (32 bytes) or 2x1 4x8x2 (24 bytes) or 1x2 4x8x2 (24 bytes)
|
|
dword offset nf11 ; 8x8x8 (64 bytes)
|
|
dword offset nf12 ; low 4x4x8 (16 bytes)
|
|
dword offset nf13 ; 2x2 4x4x0 (ie 2x2x8) (4 bytes)
|
|
dword offset nf14 ; 8x8x0 (1 byte)
|
|
dword offset nf15 ; mix 8x8x0 (2 bytes)
|
|
.code
|
|
|
|
NF_DECOMP_INIT 0
|
|
|
|
mov eax, nf_back_right
|
|
sub eax, SWIDTH
|
|
mov nfpk_back_right, eax
|
|
|
|
mov esi, comp
|
|
mov edi, tbuf
|
|
nf_StartRow:
|
|
mov eax, w
|
|
shr eax, 1
|
|
mov wcnt,eax
|
|
ALIGN 4
|
|
nf_NextPair:
|
|
dec wcnt
|
|
js nf_NextRow
|
|
mov ebx, ops
|
|
mov al, [ebx]
|
|
inc ebx
|
|
mov ops, ebx
|
|
|
|
xor ebx, ebx
|
|
mov bl, al
|
|
shr bl, 4
|
|
and eax, 0Fh
|
|
push offset nf_NextPair
|
|
push nfpk_OpTbl[ebx*4]
|
|
jmp nfpk_OpTbl[eax*4]
|
|
|
|
nf_NextRow:
|
|
add edi, new_row
|
|
dec h
|
|
jnz nf_StartRow
|
|
LOG_LABEL "EndPkDecomp"
|
|
|
|
ret
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
|
|
nf0: ; No change from previous buffer
|
|
mov eax, DiffBufPtrs
|
|
jmp nf_shift
|
|
|
|
; \ Diagonal blend
|
|
;0 1
|
|
; 00010101 1
|
|
; 00001313 2
|
|
; 20303101 3
|
|
; 02030313 4
|
|
; 23203031 5
|
|
; 02020333 6
|
|
; 23232333 7
|
|
;2 22023233 8
|
|
;
|
|
nf0_d:
|
|
; 3412 (low to high)
|
|
;------
|
|
mov al, bl ; 0001 (1)
|
|
mov ah, bh
|
|
shl eax, 16
|
|
mov al, bl
|
|
mov ah, bl
|
|
mov [edi], eax
|
|
mov ah, bh ; 0101
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
mov ah, bl ; 0000 (2)
|
|
rol eax, 16
|
|
mov ah, bl
|
|
mov [edi], eax
|
|
mov al, bh ; 1313
|
|
mov ah, ch
|
|
shl eax, 16
|
|
mov al, bh
|
|
mov ah, ch
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
mov al, ch ; 2030 (3)
|
|
mov ah, bl
|
|
shl eax, 16
|
|
mov al, cl
|
|
mov ah, bl
|
|
mov [edi], eax
|
|
mov al, bl ; 3101
|
|
mov ah, bh
|
|
shl eax, 16
|
|
mov al, ch
|
|
mov ah, bh
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
mov al, bl ; 0203 (4)
|
|
mov ah, ch
|
|
shl eax, 16
|
|
mov al, bl
|
|
mov ah, cl
|
|
mov [edi], eax
|
|
mov al, bh ; 0313
|
|
mov ah, ch
|
|
rol eax, 16
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
mov al, cl ; 2320 (5)
|
|
mov ah, bl
|
|
shl eax, 16
|
|
mov al, cl
|
|
mov ah, ch
|
|
mov [edi], eax
|
|
mov al, ch ; 3031
|
|
mov ah, bh
|
|
shl eax, 16
|
|
mov al, ch
|
|
mov ah, bl
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
mov al, bl ; 0202 (6)
|
|
mov ah, cl
|
|
shl eax, 16
|
|
mov al, bl
|
|
mov ah, cl
|
|
mov [edi], eax
|
|
mov ah, ch ; 0333
|
|
shl eax, 16
|
|
mov al, ch
|
|
mov ah, ch
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
mov al, cl ; 2323 (7)
|
|
rol eax, 16
|
|
mov al, cl
|
|
mov [edi], eax
|
|
mov al, ch ; 2333
|
|
rol eax, 16
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
mov al, bl ; 2202 (8)
|
|
mov ah, cl
|
|
shl eax, 16
|
|
mov al, cl
|
|
mov ah, cl
|
|
mov [edi], eax
|
|
mov al, ch ; 3233
|
|
mov ah, ch
|
|
shl eax, 16
|
|
mov al, ch
|
|
mov ah, cl
|
|
mov [edi+4], eax
|
|
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
retn
|
|
|
|
|
|
; / RDiagonal blend
|
|
;0 1
|
|
; 01010111 1
|
|
; 20201111 2
|
|
; 01021313 3
|
|
; 20212131 4
|
|
; 02121323 5
|
|
; 22213131 6
|
|
; 22232323 7
|
|
;2 22323133 8
|
|
;
|
|
nf0_r:
|
|
; 3412 (low to high)
|
|
;------
|
|
mov al, bl ; 0101 (1)
|
|
mov ah, bh
|
|
shl eax, 16
|
|
mov al, bl
|
|
mov ah, bh
|
|
mov [edi], eax
|
|
mov al, bh ; 0111
|
|
rol eax, 16
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
mov al, cl ; 2020 (2)
|
|
mov ah, bl
|
|
shl eax, 16
|
|
mov al, cl
|
|
mov ah, bl
|
|
mov [edi], eax
|
|
mov al, bh ; 1111
|
|
mov ah, bh
|
|
shl eax, 16
|
|
mov al, bh
|
|
mov ah, bh
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
mov al, bl ; 0102 (3)
|
|
mov ah, cl
|
|
rol eax, 16
|
|
mov al, bl
|
|
mov [edi], eax
|
|
mov al, bh ; 1313
|
|
mov ah, ch
|
|
shl eax, 16
|
|
mov al, bh
|
|
mov ah, ch
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
mov al, cl ; 2021 (4)
|
|
mov ah, bh
|
|
shl eax, 16
|
|
mov al, cl
|
|
mov ah, bl
|
|
mov [edi], eax
|
|
mov al, ch ; 2131
|
|
mov ah, bh
|
|
rol eax, 16
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
ror eax, 8 ; 0212 (5)
|
|
mov al, bl
|
|
mov ah, cl
|
|
mov [edi], eax
|
|
mov al, cl ; 1323
|
|
mov ah, ch
|
|
shl eax, 16
|
|
mov al, bh
|
|
mov ah, ch
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
mov al, cl ; 2221 (6)
|
|
mov ah, bh
|
|
shl eax, 16
|
|
mov al, cl
|
|
mov ah, cl
|
|
mov [edi], eax
|
|
mov al, ch ; 3131
|
|
mov ah, bh
|
|
shl eax, 16
|
|
mov al, ch
|
|
mov ah, bh
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
mov al, cl ; 2223 (7)
|
|
mov ah, ch
|
|
shl eax, 16
|
|
mov al, cl
|
|
mov ah, cl
|
|
mov [edi], eax
|
|
mov ah, ch ; 2323
|
|
rol eax, 16
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
rol eax, 8 ; 2232 (8)
|
|
mov al, cl
|
|
mov [edi], eax
|
|
mov al, ch ; 3133
|
|
mov ah, ch
|
|
shl eax, 16
|
|
mov al, ch
|
|
mov ah, bh
|
|
mov [edi+4], eax
|
|
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
retn
|
|
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf1: ; No change (and copied to screen)
|
|
add edi, SWIDTH
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf2: ; Near shift from older part of current buffer
|
|
xor eax, eax
|
|
mov al, [esi]
|
|
inc esi
|
|
mov ax, nfpk_ShiftP2[eax*2]
|
|
nf_xyc_shift:
|
|
xor ebx, ebx
|
|
mov bl, ah
|
|
shl eax, 24
|
|
sar eax, 24
|
|
add eax, nfpk_ShiftY[ebx*4]
|
|
jmp nf_shift
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf3: ; Near shift from newer part of current buffer
|
|
xor eax, eax
|
|
mov al, [esi]
|
|
inc esi
|
|
mov ax, nfpk_ShiftP2[eax*2]
|
|
neg al
|
|
neg ah
|
|
jmp nf_xyc_shift
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf4: ; Near shift from previous buffer
|
|
xor eax, eax
|
|
mov al, [esi]
|
|
inc esi
|
|
mov ax, nfpk_ShiftP1[eax*2]
|
|
jmp nf_xyp_shift
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf5: ; Far shift from previous buffer
|
|
mov ax, [esi]
|
|
add esi, 2
|
|
nf_xyp_shift:
|
|
xor ebx, ebx
|
|
mov bl, ah
|
|
shl eax, 24
|
|
sar eax, 24
|
|
add eax, nfpk_ShiftY[ebx*4]
|
|
add eax, DiffBufPtrs
|
|
jmp nf_shift
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
|
|
nf6: ; Run of no changes (must only appear in first nibble opcodes)
|
|
; Next nibble k specifies 2k+4 squares with no changes
|
|
add esp, 4 ; Next nibble is not an opcode
|
|
add ebx, 2 ; (minimum of 4 squares)
|
|
ALIGN 4
|
|
nf6a: add edi, SWIDTH*2 ; Advance over two squares
|
|
dec ebx
|
|
jz nf6z ; Last pair of squares
|
|
dec wcnt ; Same row?
|
|
jns nf6a ; Yes
|
|
add edi, new_row ; Advance to next row
|
|
dec h ; Decrement row count (should never become zero here)
|
|
mov eax, w ; Reset wcnt
|
|
shr eax ,1
|
|
dec eax
|
|
mov wcnt, eax
|
|
jmp nf6a
|
|
|
|
nf6z: retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf_shift:
|
|
if 0 ;debug
|
|
mov eax, 0
|
|
mov ebx, eax
|
|
jmp nf_solid
|
|
endif
|
|
mov ebx, esi ; save esi
|
|
lea esi, [edi+eax]
|
|
mov edx, nf_width
|
|
|
|
REPEAT 7
|
|
mov eax, [esi]
|
|
mov [edi], eax
|
|
mov eax, [esi+4]
|
|
mov [edi+4], eax
|
|
add esi, edx
|
|
add edi, edx
|
|
ENDM
|
|
mov eax, [esi]
|
|
mov [edi], eax
|
|
mov eax, [esi+4]
|
|
mov [edi+4], eax
|
|
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
mov esi, ebx ; restore esi
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf7: ; 8x8x1 (10 bytes)
|
|
|
|
mov ax, [esi]
|
|
cmp al, ah
|
|
ja nf23
|
|
|
|
if 0 ;debug
|
|
add esi, 10
|
|
mov eax, 0fefefefeH
|
|
mov ebx, eax
|
|
jmp nf_solid
|
|
endif
|
|
xor eax, eax
|
|
lea ecx, nfpk_mov8
|
|
lea edx, byte ptr ds:nf7_11+2
|
|
|
|
mov al, [esi+2]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf7_11-nf7_11)], bl
|
|
mov [edx+(nf7_12-nf7_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf7_13-nf7_11)], bl
|
|
mov [edx+(nf7_14-nf7_11)], bh
|
|
|
|
mov al, [esi+3]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf7_21-nf7_11)], bl
|
|
mov [edx+(nf7_22-nf7_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf7_23-nf7_11)], bl
|
|
mov [edx+(nf7_24-nf7_11)], bh
|
|
|
|
mov al, [esi+4]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf7_31-nf7_11)], bl
|
|
mov [edx+(nf7_32-nf7_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf7_33-nf7_11)], bl
|
|
mov [edx+(nf7_34-nf7_11)], bh
|
|
|
|
mov al, [esi+5]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf7_41-nf7_11)], bl
|
|
mov [edx+(nf7_42-nf7_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf7_43-nf7_11)], bl
|
|
mov [edx+(nf7_44-nf7_11)], bh
|
|
|
|
lea edx, [edx+(nf7_51-nf7_11)]
|
|
|
|
mov al, [esi+6]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf7_51-nf7_51)], bl
|
|
mov [edx+(nf7_52-nf7_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf7_53-nf7_51)], bl
|
|
mov [edx+(nf7_54-nf7_51)], bh
|
|
|
|
mov al, [esi+7]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf7_61-nf7_51)], bl
|
|
mov [edx+(nf7_62-nf7_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf7_63-nf7_51)], bl
|
|
mov [edx+(nf7_64-nf7_51)], bh
|
|
|
|
mov al, [esi+8]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf7_71-nf7_51)], bl
|
|
mov [edx+(nf7_72-nf7_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf7_73-nf7_51)], bl
|
|
mov [edx+(nf7_74-nf7_51)], bh
|
|
|
|
mov al, [esi+9]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf7_81-nf7_51)], bl
|
|
mov [edx+(nf7_82-nf7_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf7_83-nf7_51)], bl
|
|
mov [edx+(nf7_84-nf7_51)], bh
|
|
|
|
push ebp
|
|
push esi
|
|
; load bx,dx,cx,bp with 00,01,10,11 color combinations
|
|
; (note that bits are read least significant first).
|
|
mov cx, [esi]
|
|
mov esi,nf_width
|
|
mov bl,cl
|
|
mov bh,cl
|
|
mov dl,ch
|
|
mov dh,cl
|
|
mov al,ch
|
|
mov ah,ch
|
|
mov ebp,eax
|
|
jmp nf7_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf7_0:
|
|
nf7_11: mov ax, bx
|
|
shl eax, 16
|
|
nf7_12: mov ax, bx
|
|
mov [edi], eax
|
|
nf7_13: mov ax, bx
|
|
shl eax, 16
|
|
nf7_14: mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf7_21: mov ax, bx
|
|
shl eax, 16
|
|
nf7_22: mov ax, bx
|
|
mov [edi], eax
|
|
nf7_23: mov ax, bx
|
|
shl eax, 16
|
|
nf7_24: mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf7_31: mov ax, bx
|
|
shl eax, 16
|
|
nf7_32: mov ax, bx
|
|
mov [edi], eax
|
|
nf7_33: mov ax, bx
|
|
shl eax, 16
|
|
nf7_34: mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf7_41: mov ax, bx
|
|
shl eax, 16
|
|
nf7_42: mov ax, bx
|
|
mov [edi], eax
|
|
nf7_43: mov ax, bx
|
|
shl eax, 16
|
|
nf7_44: mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf7_51: mov ax, bx
|
|
shl eax, 16
|
|
nf7_52: mov ax, bx
|
|
mov [edi], eax
|
|
nf7_53: mov ax, bx
|
|
shl eax, 16
|
|
nf7_54: mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf7_61: mov ax, bx
|
|
shl eax, 16
|
|
nf7_62: mov ax, bx
|
|
mov [edi], eax
|
|
nf7_63: mov ax, bx
|
|
shl eax, 16
|
|
nf7_64: mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf7_71: mov ax, bx
|
|
shl eax, 16
|
|
nf7_72: mov ax, bx
|
|
mov [edi], eax
|
|
nf7_73: mov ax, bx
|
|
shl eax, 16
|
|
nf7_74: mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf7_81: mov ax, bx
|
|
shl eax, 16
|
|
nf7_82: mov ax, bx
|
|
mov [edi], eax
|
|
nf7_83: mov ax, bx
|
|
shl eax, 16
|
|
nf7_84: mov ax, bx
|
|
mov [edi+4], eax
|
|
|
|
pop esi
|
|
pop ebp
|
|
add esi, 10
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
;nf7+16
|
|
nf23: ; low 4x4x1 (4 bytes)
|
|
|
|
xor eax, eax
|
|
lea ecx, nfpk_mov4l
|
|
lea edx, byte ptr ds:nf23_11+2
|
|
|
|
mov al, [esi+2]
|
|
and al, 0fH
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf23_11-nf23_11)], bl
|
|
mov [edx+(nf23_12-nf23_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf23_13-nf23_11)], bl
|
|
mov [edx+(nf23_14-nf23_11)], bh
|
|
|
|
mov al, [esi+2]
|
|
shr al, 4
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf23_31-nf23_11)], bl
|
|
mov [edx+(nf23_32-nf23_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf23_33-nf23_11)], bl
|
|
mov [edx+(nf23_34-nf23_11)], bh
|
|
|
|
|
|
mov al, [esi+3]
|
|
and al, 0fH
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf23_51-nf23_11)], bl
|
|
mov [edx+(nf23_52-nf23_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf23_53-nf23_11)], bl
|
|
mov [edx+(nf23_54-nf23_11)], bh
|
|
|
|
mov al, [esi+3]
|
|
shr al, 4
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf23_71-nf23_11)], bl
|
|
mov [edx+(nf23_72-nf23_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf23_73-nf23_11)], bl
|
|
mov [edx+(nf23_74-nf23_11)], bh
|
|
|
|
mov edx, nf_width
|
|
|
|
; load bx,cx with 00,11 color combinations
|
|
mov bx, [esi]
|
|
mov cl, bh
|
|
mov bh, bl
|
|
mov ch, cl
|
|
|
|
jmp nf23_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf23_0:
|
|
|
|
nf23_11:mov ax, bx
|
|
shl eax, 16
|
|
nf23_12:mov ax, bx
|
|
mov [edi], eax
|
|
mov [edi+edx], eax
|
|
|
|
nf23_13:mov ax, bx
|
|
shl eax, 16
|
|
nf23_14:mov ax, bx
|
|
mov [edi+4], eax
|
|
mov [edi+edx+4], eax
|
|
lea edi, [edi+edx*2]
|
|
|
|
nf23_31:mov ax, bx
|
|
shl eax, 16
|
|
nf23_32:mov ax, bx
|
|
mov [edi], eax
|
|
mov [edi+edx], eax
|
|
|
|
nf23_33:mov ax, bx
|
|
shl eax, 16
|
|
nf23_34:mov ax, bx
|
|
mov [edi+4], eax
|
|
mov [edi+edx+4], eax
|
|
lea edi, [edi+edx*2]
|
|
|
|
nf23_51:mov ax, bx
|
|
shl eax, 16
|
|
nf23_52:mov ax, bx
|
|
mov [edi], eax
|
|
mov [edi+edx], eax
|
|
|
|
nf23_53:mov ax, bx
|
|
shl eax, 16
|
|
nf23_54:mov ax, bx
|
|
mov [edi+4], eax
|
|
mov [edi+edx+4], eax
|
|
lea edi, [edi+edx*2]
|
|
|
|
nf23_71:mov ax, bx
|
|
shl eax, 16
|
|
nf23_72:mov ax, bx
|
|
mov [edi], eax
|
|
mov [edi+edx], eax
|
|
|
|
nf23_73:mov ax, bx
|
|
shl eax, 16
|
|
nf23_74:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
mov [edi+4], eax
|
|
|
|
sub edi, nfpk_back_right
|
|
add esi, 4
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf8: ; 2x2 4x4x1 (16 bytes)
|
|
|
|
mov ax, [esi]
|
|
cmp al, ah
|
|
ja nf24
|
|
|
|
xor eax, eax
|
|
lea ecx, nfpk_mov8
|
|
lea edx, byte ptr ds:nf8_11+2
|
|
|
|
mov al, [esi+2]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf8_11-nf8_11)], bl
|
|
mov [edx+(nf8_12-nf8_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf8_13-nf8_11)], bl
|
|
mov [edx+(nf8_14-nf8_11)], bh
|
|
|
|
mov al, [esi+3]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf8_21-nf8_11)], bl
|
|
mov [edx+(nf8_22-nf8_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf8_23-nf8_11)], bl
|
|
mov [edx+(nf8_24-nf8_11)], bh
|
|
|
|
|
|
mov al, [esi+6]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf8_31-nf8_11)], bl
|
|
mov [edx+(nf8_32-nf8_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf8_33-nf8_11)], bl
|
|
mov [edx+(nf8_34-nf8_11)], bh
|
|
|
|
mov al, [esi+7]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf8_41-nf8_11)], bl
|
|
mov [edx+(nf8_42-nf8_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf8_43-nf8_11)], bl
|
|
mov [edx+(nf8_44-nf8_11)], bh
|
|
|
|
add edx, nf8_51-nf8_11
|
|
|
|
mov al, [esi+10]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf8_51-nf8_51)], bl
|
|
mov [edx+(nf8_52-nf8_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf8_53-nf8_51)], bl
|
|
mov [edx+(nf8_54-nf8_51)], bh
|
|
|
|
mov al, [esi+11]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf8_61-nf8_51)], bl
|
|
mov [edx+(nf8_62-nf8_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf8_63-nf8_51)], bl
|
|
mov [edx+(nf8_64-nf8_51)], bh
|
|
|
|
|
|
mov al, [esi+14]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf8_71-nf8_51)], bl
|
|
mov [edx+(nf8_72-nf8_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf8_73-nf8_51)], bl
|
|
mov [edx+(nf8_74-nf8_51)], bh
|
|
|
|
mov al, [esi+15]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf8_81-nf8_51)], bl
|
|
mov [edx+(nf8_82-nf8_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf8_83-nf8_51)], bl
|
|
mov [edx+(nf8_84-nf8_51)], bh
|
|
|
|
|
|
push ebp
|
|
push esi
|
|
; load bx,dx,cx,bp with 00,01,10,11 color combinations
|
|
; (note that bits are read least significant first).
|
|
mov cx, [esi]
|
|
mov esi, nf_width
|
|
mov bl,cl
|
|
mov bh,cl
|
|
mov dl,ch
|
|
mov dh,cl
|
|
mov al,ch
|
|
mov ah,ch
|
|
mov ebp,eax
|
|
|
|
jmp nf8_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf8_0:
|
|
nf8_11: mov ax, bx
|
|
shl eax, 16
|
|
nf8_12: mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
nf8_13: mov ax, bx
|
|
shl eax, 16
|
|
nf8_14: mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
|
|
nf8_21: mov ax, bx
|
|
shl eax, 16
|
|
nf8_22: mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
nf8_23: mov ax, bx
|
|
shl eax, 16
|
|
nf8_24: mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
|
|
mov eax, [esp]
|
|
mov cx, [eax+4]
|
|
mov bl,cl
|
|
mov bh,cl
|
|
mov dl,ch
|
|
mov dh,cl
|
|
mov al,ch
|
|
mov ah,ch
|
|
mov ebp,eax
|
|
|
|
nf8_31: mov ax, bx
|
|
shl eax, 16
|
|
nf8_32: mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
nf8_33: mov ax, bx
|
|
shl eax, 16
|
|
nf8_34: mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
|
|
nf8_41: mov ax, bx
|
|
shl eax, 16
|
|
nf8_42: mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
nf8_43: mov ax, bx
|
|
shl eax, 16
|
|
nf8_44: mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
|
|
lea eax, [esi*8-4]
|
|
sub edi, eax
|
|
|
|
mov eax, [esp]
|
|
mov cx, [eax+8]
|
|
mov bl,cl
|
|
mov bh,cl
|
|
mov dl,ch
|
|
mov dh,cl
|
|
mov al,ch
|
|
mov ah,ch
|
|
mov ebp,eax
|
|
|
|
nf8_51: mov ax, bx
|
|
shl eax, 16
|
|
nf8_52: mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
nf8_53: mov ax, bx
|
|
shl eax, 16
|
|
nf8_54: mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
|
|
nf8_61: mov ax, bx
|
|
shl eax, 16
|
|
nf8_62: mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
nf8_63: mov ax, bx
|
|
shl eax, 16
|
|
nf8_64: mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
|
|
mov eax, [esp]
|
|
mov cx, [eax+12]
|
|
mov bl,cl
|
|
mov bh,cl
|
|
mov dl,ch
|
|
mov dh,cl
|
|
mov al,ch
|
|
mov ah,ch
|
|
mov ebp,eax
|
|
|
|
nf8_71: mov ax, bx
|
|
shl eax, 16
|
|
nf8_72: mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
nf8_73: mov ax, bx
|
|
shl eax, 16
|
|
nf8_74: mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
|
|
nf8_81: mov ax, bx
|
|
shl eax, 16
|
|
nf8_82: mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
nf8_83: mov ax, bx
|
|
shl eax, 16
|
|
nf8_84: mov ax, bx
|
|
mov [edi], eax
|
|
|
|
pop esi
|
|
pop ebp
|
|
add esi, 16
|
|
sub edi, 4
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
;nf8+16
|
|
nf24: ; 2x1 4x8x1 (12 bytes)
|
|
|
|
mov ax, [esi+6]
|
|
cmp al, ah
|
|
ja nf40
|
|
|
|
xor eax, eax
|
|
lea ecx, nfpk_mov8
|
|
lea edx, byte ptr ds:nf24_11+2
|
|
|
|
mov al, [esi+2]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf24_11-nf24_11)], bl
|
|
mov [edx+(nf24_12-nf24_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf24_13-nf24_11)], bl
|
|
mov [edx+(nf24_14-nf24_11)], bh
|
|
|
|
mov al, [esi+3]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf24_21-nf24_11)], bl
|
|
mov [edx+(nf24_22-nf24_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf24_23-nf24_11)], bl
|
|
mov [edx+(nf24_24-nf24_11)], bh
|
|
|
|
|
|
mov al, [esi+4]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf24_31-nf24_11)], bl
|
|
mov [edx+(nf24_32-nf24_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf24_33-nf24_11)], bl
|
|
mov [edx+(nf24_34-nf24_11)], bh
|
|
|
|
mov al, [esi+5]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf24_41-nf24_11)], bl
|
|
mov [edx+(nf24_42-nf24_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf24_43-nf24_11)], bl
|
|
mov [edx+(nf24_44-nf24_11)], bh
|
|
|
|
add edx, nf24_51-nf24_11
|
|
|
|
mov al, [esi+8]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf24_51-nf24_51)], bl
|
|
mov [edx+(nf24_52-nf24_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf24_53-nf24_51)], bl
|
|
mov [edx+(nf24_54-nf24_51)], bh
|
|
|
|
mov al, [esi+9]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf24_61-nf24_51)], bl
|
|
mov [edx+(nf24_62-nf24_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf24_63-nf24_51)], bl
|
|
mov [edx+(nf24_64-nf24_51)], bh
|
|
|
|
|
|
mov al, [esi+10]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf24_71-nf24_51)], bl
|
|
mov [edx+(nf24_72-nf24_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf24_73-nf24_51)], bl
|
|
mov [edx+(nf24_74-nf24_51)], bh
|
|
|
|
mov al, [esi+11]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf24_81-nf24_51)], bl
|
|
mov [edx+(nf24_82-nf24_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf24_83-nf24_51)], bl
|
|
mov [edx+(nf24_84-nf24_51)], bh
|
|
|
|
|
|
push ebp
|
|
push esi
|
|
; load bx,dx,cx,bp with 00,01,10,11 color combinations
|
|
; (note that bits are read least significant first).
|
|
mov cx, [esi]
|
|
mov esi, nf_width
|
|
mov bl,cl
|
|
mov bh,cl
|
|
mov dl,ch
|
|
mov dh,cl
|
|
mov al,ch
|
|
mov ah,ch
|
|
mov ebp,eax
|
|
|
|
jmp nf24_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf24_0:
|
|
nf24_11:mov ax, bx
|
|
shl eax, 16
|
|
nf24_12:mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
nf24_13:mov ax, bx
|
|
shl eax, 16
|
|
nf24_14:mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
|
|
nf24_21:mov ax, bx
|
|
shl eax, 16
|
|
nf24_22:mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
nf24_23:mov ax, bx
|
|
shl eax, 16
|
|
nf24_24:mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
|
|
nf24_31:mov ax, bx
|
|
shl eax, 16
|
|
nf24_32:mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
nf24_33:mov ax, bx
|
|
shl eax, 16
|
|
nf24_34:mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
|
|
nf24_41:mov ax, bx
|
|
shl eax, 16
|
|
nf24_42:mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
nf24_43:mov ax, bx
|
|
shl eax, 16
|
|
nf24_44:mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
|
|
lea eax, [esi*8-4]
|
|
sub edi, eax
|
|
|
|
mov eax, [esp]
|
|
mov cx, [eax+6]
|
|
mov bl,cl
|
|
mov bh,cl
|
|
mov dl,ch
|
|
mov dh,cl
|
|
mov al,ch
|
|
mov ah,ch
|
|
mov ebp,eax
|
|
|
|
nf24_51:mov ax, bx
|
|
shl eax, 16
|
|
nf24_52:mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
nf24_53:mov ax, bx
|
|
shl eax, 16
|
|
nf24_54:mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
|
|
nf24_61:mov ax, bx
|
|
shl eax, 16
|
|
nf24_62:mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
nf24_63:mov ax, bx
|
|
shl eax, 16
|
|
nf24_64:mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
|
|
nf24_71:mov ax, bx
|
|
shl eax, 16
|
|
nf24_72:mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
nf24_73:mov ax, bx
|
|
shl eax, 16
|
|
nf24_74:mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
|
|
nf24_81:mov ax, bx
|
|
shl eax, 16
|
|
nf24_82:mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
nf24_83:mov ax, bx
|
|
shl eax, 16
|
|
nf24_84:mov ax, bx
|
|
mov [edi], eax
|
|
|
|
pop esi
|
|
pop ebp
|
|
add esi, 12
|
|
sub edi, 4
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
;nf8+32
|
|
nf40: ; 1x2 8x4x1 (12 bytes)
|
|
|
|
xor eax, eax
|
|
lea ecx, nfpk_mov8
|
|
lea edx, byte ptr ds:nf40_11+2
|
|
|
|
mov al, [esi+2]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf40_11-nf40_11)], bl
|
|
mov [edx+(nf40_12-nf40_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf40_13-nf40_11)], bl
|
|
mov [edx+(nf40_14-nf40_11)], bh
|
|
|
|
mov al, [esi+3]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf40_21-nf40_11)], bl
|
|
mov [edx+(nf40_22-nf40_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf40_23-nf40_11)], bl
|
|
mov [edx+(nf40_24-nf40_11)], bh
|
|
|
|
|
|
mov al, [esi+4]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf40_31-nf40_11)], bl
|
|
mov [edx+(nf40_32-nf40_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf40_33-nf40_11)], bl
|
|
mov [edx+(nf40_34-nf40_11)], bh
|
|
|
|
mov al, [esi+5]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf40_41-nf40_11)], bl
|
|
mov [edx+(nf40_42-nf40_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf40_43-nf40_11)], bl
|
|
mov [edx+(nf40_44-nf40_11)], bh
|
|
|
|
add edx, nf40_51-nf40_11
|
|
|
|
mov al, [esi+8]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf40_51-nf40_51)], bl
|
|
mov [edx+(nf40_52-nf40_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf40_53-nf40_51)], bl
|
|
mov [edx+(nf40_54-nf40_51)], bh
|
|
|
|
mov al, [esi+9]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf40_61-nf40_51)], bl
|
|
mov [edx+(nf40_62-nf40_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf40_63-nf40_51)], bl
|
|
mov [edx+(nf40_64-nf40_51)], bh
|
|
|
|
|
|
mov al, [esi+10]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf40_71-nf40_51)], bl
|
|
mov [edx+(nf40_72-nf40_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf40_73-nf40_51)], bl
|
|
mov [edx+(nf40_74-nf40_51)], bh
|
|
|
|
mov al, [esi+11]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf40_81-nf40_51)], bl
|
|
mov [edx+(nf40_82-nf40_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf40_83-nf40_51)], bl
|
|
mov [edx+(nf40_84-nf40_51)], bh
|
|
|
|
|
|
push ebp
|
|
push esi
|
|
; load bx,dx,cx,bp with 00,01,10,11 color combinations
|
|
; (note that bits are read least significant first).
|
|
mov cx, [esi]
|
|
mov esi, nf_width
|
|
mov bl,cl
|
|
mov bh,cl
|
|
mov dl,ch
|
|
mov dh,cl
|
|
mov al,ch
|
|
mov ah,ch
|
|
mov ebp,eax
|
|
|
|
jmp nf40_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf40_0:
|
|
nf40_11:mov ax, bx
|
|
shl eax, 16
|
|
nf40_12:mov ax, bx
|
|
mov [edi], eax
|
|
nf40_13:mov ax, bx
|
|
shl eax, 16
|
|
nf40_14:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf40_21:mov ax, bx
|
|
shl eax, 16
|
|
nf40_22:mov ax, bx
|
|
mov [edi], eax
|
|
nf40_23:mov ax, bx
|
|
shl eax, 16
|
|
nf40_24:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf40_31:mov ax, bx
|
|
shl eax, 16
|
|
nf40_32:mov ax, bx
|
|
mov [edi], eax
|
|
nf40_33:mov ax, bx
|
|
shl eax, 16
|
|
nf40_34:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf40_41:mov ax, bx
|
|
shl eax, 16
|
|
nf40_42:mov ax, bx
|
|
mov [edi], eax
|
|
nf40_43:mov ax, bx
|
|
shl eax, 16
|
|
nf40_44:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
mov eax, [esp]
|
|
mov cx, [eax+6]
|
|
mov bl,cl
|
|
mov bh,cl
|
|
mov dl,ch
|
|
mov dh,cl
|
|
mov al,ch
|
|
mov ah,ch
|
|
mov ebp,eax
|
|
|
|
nf40_51:mov ax, bx
|
|
shl eax, 16
|
|
nf40_52:mov ax, bx
|
|
mov [edi], eax
|
|
nf40_53:mov ax, bx
|
|
shl eax, 16
|
|
nf40_54:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf40_61:mov ax, bx
|
|
shl eax, 16
|
|
nf40_62:mov ax, bx
|
|
mov [edi], eax
|
|
nf40_63:mov ax, bx
|
|
shl eax, 16
|
|
nf40_64:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf40_71:mov ax, bx
|
|
shl eax, 16
|
|
nf40_72:mov ax, bx
|
|
mov [edi], eax
|
|
nf40_73:mov ax, bx
|
|
shl eax, 16
|
|
nf40_74:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf40_81:mov ax, bx
|
|
shl eax, 16
|
|
nf40_82:mov ax, bx
|
|
mov [edi], eax
|
|
nf40_83:mov ax, bx
|
|
shl eax, 16
|
|
nf40_84:mov ax, bx
|
|
mov [edi+4], eax
|
|
|
|
pop esi
|
|
pop ebp
|
|
add esi, 12
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf9: ; 8x8x2 (20 bytes)
|
|
|
|
mov eax, [esi]
|
|
cmp al, ah
|
|
ja nf41
|
|
|
|
shr eax, 16
|
|
cmp al, ah
|
|
ja nf25
|
|
|
|
xor eax, eax
|
|
lea ecx, nfpk_mov4
|
|
lea edx, byte ptr ds:nf9_11+1
|
|
|
|
mov al, [esi+4]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_11-nf9_11)], bl
|
|
mov [edx+(nf9_12-nf9_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf9_13-nf9_11)], bl
|
|
mov [edx+(nf9_14-nf9_11)], bh
|
|
|
|
mov al, [esi+5]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_15-nf9_11)], bl
|
|
mov [edx+(nf9_16-nf9_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf9_17-nf9_11)], bl
|
|
mov [edx+(nf9_18-nf9_11)], bh
|
|
|
|
|
|
mov al, [esi+6]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_21-nf9_11)], bl
|
|
mov [edx+(nf9_22-nf9_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf9_23-nf9_11)], bl
|
|
mov [edx+(nf9_24-nf9_11)], bh
|
|
|
|
mov al, [esi+7]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_25-nf9_11)], bl
|
|
mov [edx+(nf9_26-nf9_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf9_27-nf9_11)], bl
|
|
mov [edx+(nf9_28-nf9_11)], bh
|
|
|
|
|
|
mov al, [esi+8]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_31-nf9_11)], bl
|
|
mov [edx+(nf9_32-nf9_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf9_33-nf9_11)], bl
|
|
mov [edx+(nf9_34-nf9_11)], bh
|
|
|
|
mov al, [esi+9]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_35-nf9_11)], bl
|
|
mov [edx+(nf9_36-nf9_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf9_37-nf9_11)], bl
|
|
mov [edx+(nf9_38-nf9_11)], bh
|
|
|
|
|
|
mov al, [esi+10]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_41-nf9_11)], bl
|
|
mov [edx+(nf9_42-nf9_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf9_43-nf9_11)], bl
|
|
mov [edx+(nf9_44-nf9_11)], bh
|
|
|
|
mov al, [esi+11]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_45-nf9_11)], bl
|
|
mov [edx+(nf9_46-nf9_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf9_47-nf9_11)], bl
|
|
mov [edx+(nf9_48-nf9_11)], bh
|
|
|
|
|
|
lea edx, [edx+(nf9_51-nf9_11)]
|
|
|
|
mov al, [esi+12]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_51-nf9_51)], bl
|
|
mov [edx+(nf9_52-nf9_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf9_53-nf9_51)], bl
|
|
mov [edx+(nf9_54-nf9_51)], bh
|
|
|
|
mov al, [esi+13]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_55-nf9_51)], bl
|
|
mov [edx+(nf9_56-nf9_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf9_57-nf9_51)], bl
|
|
mov [edx+(nf9_58-nf9_51)], bh
|
|
|
|
|
|
mov al, [esi+14]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_61-nf9_51)], bl
|
|
mov [edx+(nf9_62-nf9_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf9_63-nf9_51)], bl
|
|
mov [edx+(nf9_64-nf9_51)], bh
|
|
|
|
mov al, [esi+15]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_65-nf9_51)], bl
|
|
mov [edx+(nf9_66-nf9_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf9_67-nf9_51)], bl
|
|
mov [edx+(nf9_68-nf9_51)], bh
|
|
|
|
|
|
mov al, [esi+16]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_71-nf9_51)], bl
|
|
mov [edx+(nf9_72-nf9_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf9_73-nf9_51)], bl
|
|
mov [edx+(nf9_74-nf9_51)], bh
|
|
|
|
mov al, [esi+17]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_75-nf9_51)], bl
|
|
mov [edx+(nf9_76-nf9_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf9_77-nf9_51)], bl
|
|
mov [edx+(nf9_78-nf9_51)], bh
|
|
|
|
|
|
mov al, [esi+18]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_81-nf9_51)], bl
|
|
mov [edx+(nf9_82-nf9_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf9_83-nf9_51)], bl
|
|
mov [edx+(nf9_84-nf9_51)], bh
|
|
|
|
mov al, [esi+19]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_85-nf9_51)], bl
|
|
mov [edx+(nf9_86-nf9_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf9_87-nf9_51)], bl
|
|
mov [edx+(nf9_88-nf9_51)], bh
|
|
|
|
; Load bl,bh,cl,ch with four colors
|
|
mov bx, [esi]
|
|
mov cx, [esi+2]
|
|
|
|
mov edx, nf_width
|
|
jmp nf9_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf9_0:
|
|
nf9_11: mov al, bl
|
|
nf9_12: mov ah, bl
|
|
shl eax, 16
|
|
nf9_13: mov al, bl
|
|
nf9_14: mov ah, bl
|
|
mov [edi], eax
|
|
|
|
nf9_15: mov al, bl
|
|
nf9_16: mov ah, bl
|
|
shl eax, 16
|
|
nf9_17: mov al, bl
|
|
nf9_18: mov ah, bl
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf9_21: mov al, bl
|
|
nf9_22: mov ah, bl
|
|
shl eax, 16
|
|
nf9_23: mov al, bl
|
|
nf9_24: mov ah, bl
|
|
mov [edi], eax
|
|
|
|
nf9_25: mov al, bl
|
|
nf9_26: mov ah, bl
|
|
shl eax, 16
|
|
nf9_27: mov al, bl
|
|
nf9_28: mov ah, bl
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf9_31: mov al, bl
|
|
nf9_32: mov ah, bl
|
|
shl eax, 16
|
|
nf9_33: mov al, bl
|
|
nf9_34: mov ah, bl
|
|
mov [edi], eax
|
|
|
|
nf9_35: mov al, bl
|
|
nf9_36: mov ah, bl
|
|
shl eax, 16
|
|
nf9_37: mov al, bl
|
|
nf9_38: mov ah, bl
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf9_41: mov al, bl
|
|
nf9_42: mov ah, bl
|
|
shl eax, 16
|
|
nf9_43: mov al, bl
|
|
nf9_44: mov ah, bl
|
|
mov [edi], eax
|
|
|
|
nf9_45: mov al, bl
|
|
nf9_46: mov ah, bl
|
|
shl eax, 16
|
|
nf9_47: mov al, bl
|
|
nf9_48: mov ah, bl
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf9_51: mov al, bl
|
|
nf9_52: mov ah, bl
|
|
shl eax, 16
|
|
nf9_53: mov al, bl
|
|
nf9_54: mov ah, bl
|
|
mov [edi], eax
|
|
|
|
nf9_55: mov al, bl
|
|
nf9_56: mov ah, bl
|
|
shl eax, 16
|
|
nf9_57: mov al, bl
|
|
nf9_58: mov ah, bl
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf9_61: mov al, bl
|
|
nf9_62: mov ah, bl
|
|
shl eax, 16
|
|
nf9_63: mov al, bl
|
|
nf9_64: mov ah, bl
|
|
mov [edi], eax
|
|
|
|
nf9_65: mov al, bl
|
|
nf9_66: mov ah, bl
|
|
shl eax, 16
|
|
nf9_67: mov al, bl
|
|
nf9_68: mov ah, bl
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf9_71: mov al, bl
|
|
nf9_72: mov ah, bl
|
|
shl eax, 16
|
|
nf9_73: mov al, bl
|
|
nf9_74: mov ah, bl
|
|
mov [edi], eax
|
|
|
|
nf9_75: mov al, bl
|
|
nf9_76: mov ah, bl
|
|
shl eax, 16
|
|
nf9_77: mov al, bl
|
|
nf9_78: mov ah, bl
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf9_81: mov al, bl
|
|
nf9_82: mov ah, bl
|
|
shl eax, 16
|
|
nf9_83: mov al, bl
|
|
nf9_84: mov ah, bl
|
|
mov [edi], eax
|
|
|
|
nf9_85: mov al, bl
|
|
nf9_86: mov ah, bl
|
|
shl eax, 16
|
|
nf9_87: mov al, bl
|
|
nf9_88: mov ah, bl
|
|
mov [edi+4], eax
|
|
|
|
add esi, 20
|
|
sub edi, nfpk_back_right
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
;nf9+16
|
|
nf25: ; low 4x4x2 (8 bytes)
|
|
|
|
if 0 ;debug
|
|
mov eax, 0
|
|
mov ebx, 0
|
|
add esi, 8
|
|
jmp nf_solid
|
|
endif
|
|
|
|
xor eax, eax
|
|
lea ecx, nfpk_mov4
|
|
lea edx, byte ptr ds:nf25_11+1
|
|
|
|
mov al, [esi+4]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf25_14-nf25_11)], bl
|
|
mov [edx+(nf25_13-nf25_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf25_12-nf25_11)], bl
|
|
mov [edx+(nf25_11-nf25_11)], bh
|
|
|
|
mov al, [esi+5]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf25_24-nf25_11)], bl
|
|
mov [edx+(nf25_23-nf25_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf25_22-nf25_11)], bl
|
|
mov [edx+(nf25_21-nf25_11)], bh
|
|
|
|
|
|
mov al, [esi+6]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf25_34-nf25_11)], bl
|
|
mov [edx+(nf25_33-nf25_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf25_32-nf25_11)], bl
|
|
mov [edx+(nf25_31-nf25_11)], bh
|
|
|
|
mov al, [esi+7]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf25_44-nf25_11)], bl
|
|
mov [edx+(nf25_43-nf25_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf25_42-nf25_11)], bl
|
|
mov [edx+(nf25_41-nf25_11)], bh
|
|
|
|
; Load bl,bh,cl,ch with four colors
|
|
mov bx, [esi]
|
|
mov cx, [esi+2]
|
|
|
|
mov edx, nf_width
|
|
jmp nf25_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf25_0:
|
|
nf25_11:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf25_12:mov al, bl
|
|
mov ah, al
|
|
mov [edi], eax
|
|
mov [edi+edx], eax
|
|
nf25_13:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf25_14:mov al, bl
|
|
mov ah, al
|
|
mov [edi+4], eax
|
|
mov [edi+edx+4], eax
|
|
lea edi, [edi+edx*2]
|
|
|
|
nf25_21:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf25_22:mov al, bl
|
|
mov ah, al
|
|
mov [edi], eax
|
|
mov [edi+edx], eax
|
|
nf25_23:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf25_24:mov al, bl
|
|
mov ah, al
|
|
mov [edi+4], eax
|
|
mov [edi+edx+4], eax
|
|
lea edi, [edi+edx*2]
|
|
|
|
nf25_31:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf25_32:mov al, bl
|
|
mov ah, al
|
|
mov [edi], eax
|
|
mov [edi+edx], eax
|
|
nf25_33:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf25_34:mov al, bl
|
|
mov ah, al
|
|
mov [edi+4], eax
|
|
mov [edi+edx+4], eax
|
|
lea edi, [edi+edx*2]
|
|
|
|
nf25_41:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf25_42:mov al, bl
|
|
mov ah, al
|
|
mov [edi], eax
|
|
mov [edi+edx], eax
|
|
nf25_43:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf25_44:mov al, bl
|
|
mov ah, al
|
|
mov [edi+4], eax
|
|
mov [edi+edx+4], eax
|
|
add edi, edx
|
|
|
|
add esi, 8
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
;nf9+32
|
|
nf41: ; low 4x8x2 (12 bytes)
|
|
shr eax, 16
|
|
cmp al, ah
|
|
ja nf57
|
|
|
|
xor eax, eax
|
|
lea ecx, nfpk_mov4
|
|
lea edx, byte ptr ds:nf41_11+1
|
|
|
|
mov al, [esi+4]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf41_14-nf41_11)], bl
|
|
mov [edx+(nf41_13-nf41_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf41_12-nf41_11)], bl
|
|
mov [edx+(nf41_11-nf41_11)], bh
|
|
|
|
mov al, [esi+5]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf41_24-nf41_11)], bl
|
|
mov [edx+(nf41_23-nf41_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf41_22-nf41_11)], bl
|
|
mov [edx+(nf41_21-nf41_11)], bh
|
|
|
|
|
|
mov al, [esi+6]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf41_34-nf41_11)], bl
|
|
mov [edx+(nf41_33-nf41_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf41_32-nf41_11)], bl
|
|
mov [edx+(nf41_31-nf41_11)], bh
|
|
|
|
mov al, [esi+7]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf41_44-nf41_11)], bl
|
|
mov [edx+(nf41_43-nf41_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf41_42-nf41_11)], bl
|
|
mov [edx+(nf41_41-nf41_11)], bh
|
|
|
|
lea edx, [edx+(nf41_51-nf41_11)]
|
|
|
|
mov al, [esi+8]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf41_54-nf41_51)], bl
|
|
mov [edx+(nf41_53-nf41_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf41_52-nf41_51)], bl
|
|
mov [edx+(nf41_51-nf41_51)], bh
|
|
|
|
mov al, [esi+9]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf41_64-nf41_51)], bl
|
|
mov [edx+(nf41_63-nf41_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf41_62-nf41_51)], bl
|
|
mov [edx+(nf41_61-nf41_51)], bh
|
|
|
|
|
|
mov al, [esi+10]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf41_74-nf41_51)], bl
|
|
mov [edx+(nf41_73-nf41_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf41_72-nf41_51)], bl
|
|
mov [edx+(nf41_71-nf41_51)], bh
|
|
|
|
mov al, [esi+11]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf41_84-nf41_51)], bl
|
|
mov [edx+(nf41_83-nf41_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf41_82-nf41_51)], bl
|
|
mov [edx+(nf41_81-nf41_51)], bh
|
|
|
|
; Load bl,bh,cl,ch with four colors
|
|
mov bx, [esi]
|
|
mov cx, [esi+2]
|
|
|
|
mov edx, nf_width
|
|
jmp nf41_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf41_0:
|
|
nf41_11:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf41_12:mov al, bl
|
|
mov ah, al
|
|
mov [edi], eax
|
|
nf41_13:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf41_14:mov al, bl
|
|
mov ah, al
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf41_21:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf41_22:mov al, bl
|
|
mov ah, al
|
|
mov [edi], eax
|
|
nf41_23:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf41_24:mov al, bl
|
|
mov ah, al
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf41_31:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf41_32:mov al, bl
|
|
mov ah, al
|
|
mov [edi], eax
|
|
nf41_33:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf41_34:mov al, bl
|
|
mov ah, al
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf41_41:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf41_42:mov al, bl
|
|
mov ah, al
|
|
mov [edi], eax
|
|
nf41_43:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf41_44:mov al, bl
|
|
mov ah, al
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf41_51:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf41_52:mov al, bl
|
|
mov ah, al
|
|
mov [edi], eax
|
|
nf41_53:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf41_54:mov al, bl
|
|
mov ah, al
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf41_61:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf41_62:mov al, bl
|
|
mov ah, al
|
|
mov [edi], eax
|
|
nf41_63:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf41_64:mov al, bl
|
|
mov ah, al
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf41_71:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf41_72:mov al, bl
|
|
mov ah, al
|
|
mov [edi], eax
|
|
nf41_73:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf41_74:mov al, bl
|
|
mov ah, al
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf41_81:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf41_82:mov al, bl
|
|
mov ah, al
|
|
mov [edi], eax
|
|
nf41_83:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf41_84:mov al, bl
|
|
mov ah, al
|
|
mov [edi+4], eax
|
|
|
|
add esi, 12
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
;nf9+48
|
|
nf57: ; low 8x4x2 (12 bytes)
|
|
xor eax, eax
|
|
lea ecx, nfpk_mov4
|
|
lea edx, byte ptr ds:nf57_11+1
|
|
|
|
mov al, [esi+4]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf57_11-nf57_11)], bl
|
|
mov [edx+(nf57_12-nf57_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf57_13-nf57_11)], bl
|
|
mov [edx+(nf57_14-nf57_11)], bh
|
|
|
|
mov al, [esi+5]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf57_15-nf57_11)], bl
|
|
mov [edx+(nf57_16-nf57_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf57_17-nf57_11)], bl
|
|
mov [edx+(nf57_18-nf57_11)], bh
|
|
|
|
|
|
mov al, [esi+6]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf57_21-nf57_11)], bl
|
|
mov [edx+(nf57_22-nf57_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf57_23-nf57_11)], bl
|
|
mov [edx+(nf57_24-nf57_11)], bh
|
|
|
|
mov al, [esi+7]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf57_25-nf57_11)], bl
|
|
mov [edx+(nf57_26-nf57_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf57_27-nf57_11)], bl
|
|
mov [edx+(nf57_28-nf57_11)], bh
|
|
|
|
|
|
mov al, [esi+8]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf57_31-nf57_11)], bl
|
|
mov [edx+(nf57_32-nf57_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf57_33-nf57_11)], bl
|
|
mov [edx+(nf57_34-nf57_11)], bh
|
|
|
|
mov al, [esi+9]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf57_35-nf57_11)], bl
|
|
mov [edx+(nf57_36-nf57_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf57_37-nf57_11)], bl
|
|
mov [edx+(nf57_38-nf57_11)], bh
|
|
|
|
|
|
mov al, [esi+10]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf57_41-nf57_11)], bl
|
|
mov [edx+(nf57_42-nf57_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf57_43-nf57_11)], bl
|
|
mov [edx+(nf57_44-nf57_11)], bh
|
|
|
|
mov al, [esi+11]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf57_45-nf57_11)], bl
|
|
mov [edx+(nf57_46-nf57_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf57_47-nf57_11)], bl
|
|
mov [edx+(nf57_48-nf57_11)], bh
|
|
|
|
; Load bl,bh,cl,ch with four colors
|
|
mov bx, [esi]
|
|
mov cx, [esi+2]
|
|
|
|
mov edx, nf_width
|
|
jmp nf57_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf57_0:
|
|
nf57_11:mov al, bl
|
|
nf57_12:mov ah, bl
|
|
shl eax, 16
|
|
nf57_13:mov al, bl
|
|
nf57_14:mov ah, bl
|
|
mov [edi], eax
|
|
mov [edi+edx], eax
|
|
|
|
nf57_15:mov al, bl
|
|
nf57_16:mov ah, bl
|
|
shl eax, 16
|
|
nf57_17:mov al, bl
|
|
nf57_18:mov ah, bl
|
|
mov [edi+4], eax
|
|
mov [edi+edx+4], eax
|
|
lea edi, [edi+edx*2]
|
|
|
|
nf57_21:mov al, bl
|
|
nf57_22:mov ah, bl
|
|
shl eax, 16
|
|
nf57_23:mov al, bl
|
|
nf57_24:mov ah, bl
|
|
mov [edi], eax
|
|
mov [edi+edx], eax
|
|
|
|
nf57_25:mov al, bl
|
|
nf57_26:mov ah, bl
|
|
shl eax, 16
|
|
nf57_27:mov al, bl
|
|
nf57_28:mov ah, bl
|
|
mov [edi+4], eax
|
|
mov [edi+edx+4], eax
|
|
lea edi, [edi+edx*2]
|
|
|
|
nf57_31:mov al, bl
|
|
nf57_32:mov ah, bl
|
|
shl eax, 16
|
|
nf57_33:mov al, bl
|
|
nf57_34:mov ah, bl
|
|
mov [edi], eax
|
|
mov [edi+edx], eax
|
|
|
|
nf57_35:mov al, bl
|
|
nf57_36:mov ah, bl
|
|
shl eax, 16
|
|
nf57_37:mov al, bl
|
|
nf57_38:mov ah, bl
|
|
mov [edi+4], eax
|
|
mov [edi+edx+4], eax
|
|
lea edi, [edi+edx*2]
|
|
|
|
nf57_41:mov al, bl
|
|
nf57_42:mov ah, bl
|
|
shl eax, 16
|
|
nf57_43:mov al, bl
|
|
nf57_44:mov ah, bl
|
|
mov [edi], eax
|
|
mov [edi+edx], eax
|
|
|
|
nf57_45:mov al, bl
|
|
nf57_46:mov ah, bl
|
|
shl eax, 16
|
|
nf57_47:mov al, bl
|
|
nf57_48:mov ah, bl
|
|
mov [edi+4], eax
|
|
mov [edi+edx+4], eax
|
|
add edi, edx
|
|
|
|
add esi, 12
|
|
sub edi, nfpk_back_right
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf10: ; 2x2 4x4x2 (32 bytes)
|
|
|
|
mov ax, [esi]
|
|
cmp al, ah
|
|
ja nf26
|
|
|
|
xor eax, eax
|
|
lea ecx, nfpk_mov4
|
|
lea edx, byte ptr ds:nf10_11+1
|
|
|
|
mov al, [esi+4]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_11-nf10_11)], bl
|
|
mov [edx+(nf10_12-nf10_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf10_13-nf10_11)], bl
|
|
mov [edx+(nf10_14-nf10_11)], bh
|
|
|
|
mov al, [esi+5]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_15-nf10_11)], bl
|
|
mov [edx+(nf10_16-nf10_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf10_17-nf10_11)], bl
|
|
mov [edx+(nf10_18-nf10_11)], bh
|
|
|
|
|
|
mov al, [esi+6]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_21-nf10_11)], bl
|
|
mov [edx+(nf10_22-nf10_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf10_23-nf10_11)], bl
|
|
mov [edx+(nf10_24-nf10_11)], bh
|
|
|
|
mov al, [esi+7]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_25-nf10_11)], bl
|
|
mov [edx+(nf10_26-nf10_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf10_27-nf10_11)], bl
|
|
mov [edx+(nf10_28-nf10_11)], bh
|
|
|
|
|
|
mov al, [esi+12]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_31-nf10_11)], bl
|
|
mov [edx+(nf10_32-nf10_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf10_33-nf10_11)], bl
|
|
mov [edx+(nf10_34-nf10_11)], bh
|
|
|
|
mov al, [esi+13]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_35-nf10_11)], bl
|
|
mov [edx+(nf10_36-nf10_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf10_37-nf10_11)], bl
|
|
mov [edx+(nf10_38-nf10_11)], bh
|
|
|
|
|
|
mov al, [esi+14]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_41-nf10_11)], bl
|
|
mov [edx+(nf10_42-nf10_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf10_43-nf10_11)], bl
|
|
mov [edx+(nf10_44-nf10_11)], bh
|
|
|
|
mov al, [esi+15]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_45-nf10_11)], bl
|
|
mov [edx+(nf10_46-nf10_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf10_47-nf10_11)], bl
|
|
mov [edx+(nf10_48-nf10_11)], bh
|
|
|
|
|
|
lea edx, [edx+(nf10_51-nf10_11)]
|
|
|
|
mov al, [esi+20]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_51-nf10_51)], bl
|
|
mov [edx+(nf10_52-nf10_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf10_53-nf10_51)], bl
|
|
mov [edx+(nf10_54-nf10_51)], bh
|
|
|
|
mov al, [esi+21]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_55-nf10_51)], bl
|
|
mov [edx+(nf10_56-nf10_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf10_57-nf10_51)], bl
|
|
mov [edx+(nf10_58-nf10_51)], bh
|
|
|
|
|
|
mov al, [esi+22]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_61-nf10_51)], bl
|
|
mov [edx+(nf10_62-nf10_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf10_63-nf10_51)], bl
|
|
mov [edx+(nf10_64-nf10_51)], bh
|
|
|
|
mov al, [esi+23]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_65-nf10_51)], bl
|
|
mov [edx+(nf10_66-nf10_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf10_67-nf10_51)], bl
|
|
mov [edx+(nf10_68-nf10_51)], bh
|
|
|
|
|
|
mov al, [esi+28]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_71-nf10_51)], bl
|
|
mov [edx+(nf10_72-nf10_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf10_73-nf10_51)], bl
|
|
mov [edx+(nf10_74-nf10_51)], bh
|
|
|
|
mov al, [esi+29]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_75-nf10_51)], bl
|
|
mov [edx+(nf10_76-nf10_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf10_77-nf10_51)], bl
|
|
mov [edx+(nf10_78-nf10_51)], bh
|
|
|
|
|
|
mov al, [esi+30]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_81-nf10_51)], bl
|
|
mov [edx+(nf10_82-nf10_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf10_83-nf10_51)], bl
|
|
mov [edx+(nf10_84-nf10_51)], bh
|
|
|
|
mov al, [esi+31]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_85-nf10_51)], bl
|
|
mov [edx+(nf10_86-nf10_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf10_87-nf10_51)], bl
|
|
mov [edx+(nf10_88-nf10_51)], bh
|
|
|
|
; Load bl,bh,cl,ch with four colors
|
|
mov bx, [esi]
|
|
mov cx, [esi+2]
|
|
|
|
mov edx, nf_width
|
|
jmp nf10_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf10_0:
|
|
nf10_11:mov al, bl
|
|
nf10_12:mov ah, bl
|
|
shl eax, 16
|
|
nf10_13:mov al, bl
|
|
nf10_14:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf10_15:mov al, bl
|
|
nf10_16:mov ah, bl
|
|
shl eax, 16
|
|
nf10_17:mov al, bl
|
|
nf10_18:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf10_21:mov al, bl
|
|
nf10_22:mov ah, bl
|
|
shl eax, 16
|
|
nf10_23:mov al, bl
|
|
nf10_24:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf10_25:mov al, bl
|
|
nf10_26:mov ah, bl
|
|
shl eax, 16
|
|
nf10_27:mov al, bl
|
|
nf10_28:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
; Load bl,bh,cl,ch with four colors
|
|
mov bx, [esi+8]
|
|
mov cx, [esi+10]
|
|
|
|
nf10_31:mov al, bl
|
|
nf10_32:mov ah, bl
|
|
shl eax, 16
|
|
nf10_33:mov al, bl
|
|
nf10_34:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf10_35:mov al, bl
|
|
nf10_36:mov ah, bl
|
|
shl eax, 16
|
|
nf10_37:mov al, bl
|
|
nf10_38:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf10_41:mov al, bl
|
|
nf10_42:mov ah, bl
|
|
shl eax, 16
|
|
nf10_43:mov al, bl
|
|
nf10_44:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf10_45:mov al, bl
|
|
nf10_46:mov ah, bl
|
|
shl eax, 16
|
|
nf10_47:mov al, bl
|
|
nf10_48:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
lea eax, [edx*8-4]
|
|
sub edi, eax
|
|
|
|
; Load bl,bh,cl,ch with four colors
|
|
mov bx, [esi+16]
|
|
mov cx, [esi+18]
|
|
|
|
nf10_51:mov al, bl
|
|
nf10_52:mov ah, bl
|
|
shl eax, 16
|
|
nf10_53:mov al, bl
|
|
nf10_54:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf10_55:mov al, bl
|
|
nf10_56:mov ah, bl
|
|
shl eax, 16
|
|
nf10_57:mov al, bl
|
|
nf10_58:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf10_61:mov al, bl
|
|
nf10_62:mov ah, bl
|
|
shl eax, 16
|
|
nf10_63:mov al, bl
|
|
nf10_64:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf10_65:mov al, bl
|
|
nf10_66:mov ah, bl
|
|
shl eax, 16
|
|
nf10_67:mov al, bl
|
|
nf10_68:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
; Load bl,bh,cl,ch with four colors
|
|
mov bx, [esi+24]
|
|
mov cx, [esi+26]
|
|
|
|
nf10_71:mov al, bl
|
|
nf10_72:mov ah, bl
|
|
shl eax, 16
|
|
nf10_73:mov al, bl
|
|
nf10_74:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf10_75:mov al, bl
|
|
nf10_76:mov ah, bl
|
|
shl eax, 16
|
|
nf10_77:mov al, bl
|
|
nf10_78:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf10_81:mov al, bl
|
|
nf10_82:mov ah, bl
|
|
shl eax, 16
|
|
nf10_83:mov al, bl
|
|
nf10_84:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf10_85:mov al, bl
|
|
nf10_86:mov ah, bl
|
|
shl eax, 16
|
|
nf10_87:mov al, bl
|
|
nf10_88:mov ah, bl
|
|
mov [edi], eax
|
|
|
|
add esi, 32
|
|
sub edi, 4
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
;nf10+16
|
|
nf26: ; 2x1 4x8x2 (24 bytes)
|
|
|
|
mov ax, [esi+12]
|
|
cmp al, ah
|
|
ja nf42
|
|
|
|
if 0 ;debug
|
|
mov eax, 0
|
|
mov ebx, 0
|
|
add esi, 24
|
|
jmp nf_solid
|
|
endif
|
|
|
|
xor eax, eax
|
|
lea ecx, nfpk_mov4
|
|
lea edx, byte ptr ds:nf26_11+1
|
|
|
|
mov al, [esi+4]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_11-nf26_11)], bl
|
|
mov [edx+(nf26_12-nf26_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf26_13-nf26_11)], bl
|
|
mov [edx+(nf26_14-nf26_11)], bh
|
|
|
|
mov al, [esi+5]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_15-nf26_11)], bl
|
|
mov [edx+(nf26_16-nf26_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf26_17-nf26_11)], bl
|
|
mov [edx+(nf26_18-nf26_11)], bh
|
|
|
|
|
|
mov al, [esi+6]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_21-nf26_11)], bl
|
|
mov [edx+(nf26_22-nf26_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf26_23-nf26_11)], bl
|
|
mov [edx+(nf26_24-nf26_11)], bh
|
|
|
|
mov al, [esi+7]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_25-nf26_11)], bl
|
|
mov [edx+(nf26_26-nf26_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf26_27-nf26_11)], bl
|
|
mov [edx+(nf26_28-nf26_11)], bh
|
|
|
|
mov al, [esi+8]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_31-nf26_11)], bl
|
|
mov [edx+(nf26_32-nf26_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf26_33-nf26_11)], bl
|
|
mov [edx+(nf26_34-nf26_11)], bh
|
|
|
|
mov al, [esi+9]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_35-nf26_11)], bl
|
|
mov [edx+(nf26_36-nf26_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf26_37-nf26_11)], bl
|
|
mov [edx+(nf26_38-nf26_11)], bh
|
|
|
|
|
|
mov al, [esi+10]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_41-nf26_11)], bl
|
|
mov [edx+(nf26_42-nf26_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf26_43-nf26_11)], bl
|
|
mov [edx+(nf26_44-nf26_11)], bh
|
|
|
|
mov al, [esi+11]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_45-nf26_11)], bl
|
|
mov [edx+(nf26_46-nf26_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf26_47-nf26_11)], bl
|
|
mov [edx+(nf26_48-nf26_11)], bh
|
|
|
|
|
|
lea edx, [edx+(nf26_51-nf26_11)]
|
|
|
|
mov al, [esi+16]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_51-nf26_51)], bl
|
|
mov [edx+(nf26_52-nf26_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf26_53-nf26_51)], bl
|
|
mov [edx+(nf26_54-nf26_51)], bh
|
|
|
|
mov al, [esi+17]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_55-nf26_51)], bl
|
|
mov [edx+(nf26_56-nf26_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf26_57-nf26_51)], bl
|
|
mov [edx+(nf26_58-nf26_51)], bh
|
|
|
|
|
|
mov al, [esi+18]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_61-nf26_51)], bl
|
|
mov [edx+(nf26_62-nf26_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf26_63-nf26_51)], bl
|
|
mov [edx+(nf26_64-nf26_51)], bh
|
|
|
|
mov al, [esi+19]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_65-nf26_51)], bl
|
|
mov [edx+(nf26_66-nf26_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf26_67-nf26_51)], bl
|
|
mov [edx+(nf26_68-nf26_51)], bh
|
|
|
|
|
|
mov al, [esi+20]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_71-nf26_51)], bl
|
|
mov [edx+(nf26_72-nf26_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf26_73-nf26_51)], bl
|
|
mov [edx+(nf26_74-nf26_51)], bh
|
|
|
|
mov al, [esi+21]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_75-nf26_51)], bl
|
|
mov [edx+(nf26_76-nf26_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf26_77-nf26_51)], bl
|
|
mov [edx+(nf26_78-nf26_51)], bh
|
|
|
|
|
|
mov al, [esi+22]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_81-nf26_51)], bl
|
|
mov [edx+(nf26_82-nf26_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf26_83-nf26_51)], bl
|
|
mov [edx+(nf26_84-nf26_51)], bh
|
|
|
|
mov al, [esi+23]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_85-nf26_51)], bl
|
|
mov [edx+(nf26_86-nf26_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf26_87-nf26_51)], bl
|
|
mov [edx+(nf26_88-nf26_51)], bh
|
|
|
|
; Load bl,bh,cl,ch with four colors
|
|
mov bx, [esi]
|
|
mov cx, [esi+2]
|
|
|
|
mov edx, nf_width
|
|
jmp nf26_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf26_0:
|
|
nf26_11:mov al, bl
|
|
nf26_12:mov ah, bl
|
|
shl eax, 16
|
|
nf26_13:mov al, bl
|
|
nf26_14:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf26_15:mov al, bl
|
|
nf26_16:mov ah, bl
|
|
shl eax, 16
|
|
nf26_17:mov al, bl
|
|
nf26_18:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf26_21:mov al, bl
|
|
nf26_22:mov ah, bl
|
|
shl eax, 16
|
|
nf26_23:mov al, bl
|
|
nf26_24:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf26_25:mov al, bl
|
|
nf26_26:mov ah, bl
|
|
shl eax, 16
|
|
nf26_27:mov al, bl
|
|
nf26_28:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf26_31:mov al, bl
|
|
nf26_32:mov ah, bl
|
|
shl eax, 16
|
|
nf26_33:mov al, bl
|
|
nf26_34:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf26_35:mov al, bl
|
|
nf26_36:mov ah, bl
|
|
shl eax, 16
|
|
nf26_37:mov al, bl
|
|
nf26_38:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf26_41:mov al, bl
|
|
nf26_42:mov ah, bl
|
|
shl eax, 16
|
|
nf26_43:mov al, bl
|
|
nf26_44:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf26_45:mov al, bl
|
|
nf26_46:mov ah, bl
|
|
shl eax, 16
|
|
nf26_47:mov al, bl
|
|
nf26_48:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
lea eax, [edx*8-4]
|
|
sub edi, eax
|
|
|
|
; Load bl,bh,cl,ch with four colors
|
|
mov bx, [esi+12]
|
|
mov cx, [esi+14]
|
|
|
|
nf26_51:mov al, bl
|
|
nf26_52:mov ah, bl
|
|
shl eax, 16
|
|
nf26_53:mov al, bl
|
|
nf26_54:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf26_55:mov al, bl
|
|
nf26_56:mov ah, bl
|
|
shl eax, 16
|
|
nf26_57:mov al, bl
|
|
nf26_58:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf26_61:mov al, bl
|
|
nf26_62:mov ah, bl
|
|
shl eax, 16
|
|
nf26_63:mov al, bl
|
|
nf26_64:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf26_65:mov al, bl
|
|
nf26_66:mov ah, bl
|
|
shl eax, 16
|
|
nf26_67:mov al, bl
|
|
nf26_68:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf26_71:mov al, bl
|
|
nf26_72:mov ah, bl
|
|
shl eax, 16
|
|
nf26_73:mov al, bl
|
|
nf26_74:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf26_75:mov al, bl
|
|
nf26_76:mov ah, bl
|
|
shl eax, 16
|
|
nf26_77:mov al, bl
|
|
nf26_78:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf26_81:mov al, bl
|
|
nf26_82:mov ah, bl
|
|
shl eax, 16
|
|
nf26_83:mov al, bl
|
|
nf26_84:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf26_85:mov al, bl
|
|
nf26_86:mov ah, bl
|
|
shl eax, 16
|
|
nf26_87:mov al, bl
|
|
nf26_88:mov ah, bl
|
|
mov [edi], eax
|
|
|
|
add esi, 24
|
|
sub edi, 4
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
;nf10+32
|
|
nf42: ; 1x2 8x4x2 (24 bytes)
|
|
|
|
if 0 ;debug
|
|
mov eax, 0
|
|
mov ebx, 0
|
|
add esi, 24
|
|
jmp nf_solid
|
|
endif
|
|
xor eax, eax
|
|
lea ecx, nfpk_mov4
|
|
lea edx, byte ptr ds:nf42_11+1
|
|
|
|
mov al, [esi+4]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_11-nf42_11)], bl
|
|
mov [edx+(nf42_12-nf42_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf42_13-nf42_11)], bl
|
|
mov [edx+(nf42_14-nf42_11)], bh
|
|
|
|
mov al, [esi+5]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_15-nf42_11)], bl
|
|
mov [edx+(nf42_16-nf42_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf42_17-nf42_11)], bl
|
|
mov [edx+(nf42_18-nf42_11)], bh
|
|
|
|
|
|
mov al, [esi+6]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_21-nf42_11)], bl
|
|
mov [edx+(nf42_22-nf42_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf42_23-nf42_11)], bl
|
|
mov [edx+(nf42_24-nf42_11)], bh
|
|
|
|
mov al, [esi+7]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_25-nf42_11)], bl
|
|
mov [edx+(nf42_26-nf42_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf42_27-nf42_11)], bl
|
|
mov [edx+(nf42_28-nf42_11)], bh
|
|
|
|
|
|
mov al, [esi+8]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_31-nf42_11)], bl
|
|
mov [edx+(nf42_32-nf42_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf42_33-nf42_11)], bl
|
|
mov [edx+(nf42_34-nf42_11)], bh
|
|
|
|
mov al, [esi+9]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_35-nf42_11)], bl
|
|
mov [edx+(nf42_36-nf42_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf42_37-nf42_11)], bl
|
|
mov [edx+(nf42_38-nf42_11)], bh
|
|
|
|
|
|
mov al, [esi+10]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_41-nf42_11)], bl
|
|
mov [edx+(nf42_42-nf42_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf42_43-nf42_11)], bl
|
|
mov [edx+(nf42_44-nf42_11)], bh
|
|
|
|
mov al, [esi+11]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_45-nf42_11)], bl
|
|
mov [edx+(nf42_46-nf42_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf42_47-nf42_11)], bl
|
|
mov [edx+(nf42_48-nf42_11)], bh
|
|
|
|
|
|
lea edx, [edx+(nf42_51-nf42_11)]
|
|
|
|
mov al, [esi+16]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_51-nf42_51)], bl
|
|
mov [edx+(nf42_52-nf42_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf42_53-nf42_51)], bl
|
|
mov [edx+(nf42_54-nf42_51)], bh
|
|
|
|
mov al, [esi+17]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_55-nf42_51)], bl
|
|
mov [edx+(nf42_56-nf42_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf42_57-nf42_51)], bl
|
|
mov [edx+(nf42_58-nf42_51)], bh
|
|
|
|
|
|
mov al, [esi+18]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_61-nf42_51)], bl
|
|
mov [edx+(nf42_62-nf42_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf42_63-nf42_51)], bl
|
|
mov [edx+(nf42_64-nf42_51)], bh
|
|
|
|
mov al, [esi+19]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_65-nf42_51)], bl
|
|
mov [edx+(nf42_66-nf42_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf42_67-nf42_51)], bl
|
|
mov [edx+(nf42_68-nf42_51)], bh
|
|
|
|
|
|
mov al, [esi+20]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_71-nf42_51)], bl
|
|
mov [edx+(nf42_72-nf42_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf42_73-nf42_51)], bl
|
|
mov [edx+(nf42_74-nf42_51)], bh
|
|
|
|
mov al, [esi+21]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_75-nf42_51)], bl
|
|
mov [edx+(nf42_76-nf42_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf42_77-nf42_51)], bl
|
|
mov [edx+(nf42_78-nf42_51)], bh
|
|
|
|
|
|
mov al, [esi+22]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_81-nf42_51)], bl
|
|
mov [edx+(nf42_82-nf42_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf42_83-nf42_51)], bl
|
|
mov [edx+(nf42_84-nf42_51)], bh
|
|
|
|
mov al, [esi+23]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_85-nf42_51)], bl
|
|
mov [edx+(nf42_86-nf42_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf42_87-nf42_51)], bl
|
|
mov [edx+(nf42_88-nf42_51)], bh
|
|
|
|
; Load bl,bh,cl,ch with four colors
|
|
mov bx, [esi]
|
|
mov cx, [esi+2]
|
|
|
|
mov edx, nf_width
|
|
jmp nf42_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf42_0:
|
|
nf42_11:mov al, bl
|
|
nf42_12:mov ah, bl
|
|
shl eax, 16
|
|
nf42_13:mov al, bl
|
|
nf42_14:mov ah, bl
|
|
mov [edi], eax
|
|
|
|
nf42_15:mov al, bl
|
|
nf42_16:mov ah, bl
|
|
shl eax, 16
|
|
nf42_17:mov al, bl
|
|
nf42_18:mov ah, bl
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf42_21:mov al, bl
|
|
nf42_22:mov ah, bl
|
|
shl eax, 16
|
|
nf42_23:mov al, bl
|
|
nf42_24:mov ah, bl
|
|
mov [edi], eax
|
|
|
|
nf42_25:mov al, bl
|
|
nf42_26:mov ah, bl
|
|
shl eax, 16
|
|
nf42_27:mov al, bl
|
|
nf42_28:mov ah, bl
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf42_31:mov al, bl
|
|
nf42_32:mov ah, bl
|
|
shl eax, 16
|
|
nf42_33:mov al, bl
|
|
nf42_34:mov ah, bl
|
|
mov [edi], eax
|
|
|
|
nf42_35:mov al, bl
|
|
nf42_36:mov ah, bl
|
|
shl eax, 16
|
|
nf42_37:mov al, bl
|
|
nf42_38:mov ah, bl
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf42_41:mov al, bl
|
|
nf42_42:mov ah, bl
|
|
shl eax, 16
|
|
nf42_43:mov al, bl
|
|
nf42_44:mov ah, bl
|
|
mov [edi], eax
|
|
|
|
nf42_45:mov al, bl
|
|
nf42_46:mov ah, bl
|
|
shl eax, 16
|
|
nf42_47:mov al, bl
|
|
nf42_48:mov ah, bl
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
; Load bl,bh,cl,ch with four colors
|
|
mov bx, [esi+12]
|
|
mov cx, [esi+14]
|
|
|
|
nf42_51:mov al, bl
|
|
nf42_52:mov ah, bl
|
|
shl eax, 16
|
|
nf42_53:mov al, bl
|
|
nf42_54:mov ah, bl
|
|
mov [edi], eax
|
|
|
|
nf42_55:mov al, bl
|
|
nf42_56:mov ah, bl
|
|
shl eax, 16
|
|
nf42_57:mov al, bl
|
|
nf42_58:mov ah, bl
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf42_61:mov al, bl
|
|
nf42_62:mov ah, bl
|
|
shl eax, 16
|
|
nf42_63:mov al, bl
|
|
nf42_64:mov ah, bl
|
|
mov [edi], eax
|
|
|
|
nf42_65:mov al, bl
|
|
nf42_66:mov ah, bl
|
|
shl eax, 16
|
|
nf42_67:mov al, bl
|
|
nf42_68:mov ah, bl
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf42_71:mov al, bl
|
|
nf42_72:mov ah, bl
|
|
shl eax, 16
|
|
nf42_73:mov al, bl
|
|
nf42_74:mov ah, bl
|
|
mov [edi], eax
|
|
|
|
nf42_75:mov al, bl
|
|
nf42_76:mov ah, bl
|
|
shl eax, 16
|
|
nf42_77:mov al, bl
|
|
nf42_78:mov ah, bl
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf42_81:mov al, bl
|
|
nf42_82:mov ah, bl
|
|
shl eax, 16
|
|
nf42_83:mov al, bl
|
|
nf42_84:mov ah, bl
|
|
mov [edi], eax
|
|
|
|
nf42_85:mov al, bl
|
|
nf42_86:mov ah, bl
|
|
shl eax, 16
|
|
nf42_87:mov al, bl
|
|
nf42_88:mov ah, bl
|
|
mov [edi+4], eax
|
|
|
|
add esi, 24
|
|
sub edi, nfpk_back_right
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf11: ; 8x8x8 (64 bytes)
|
|
if 0 ;debug
|
|
add esi, 64
|
|
mov eax, 0fefefefeH
|
|
; mov ebx, eax
|
|
mov ebx, 0
|
|
jmp nf_solid
|
|
endif
|
|
mov edx, nf_width
|
|
|
|
mov eax, [esi] ;0
|
|
mov [edi], eax
|
|
mov eax, [esi+4]
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
mov eax, [esi+8] ;1
|
|
mov [edi], eax
|
|
mov eax, [esi+12]
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
mov eax, [esi+16] ;2
|
|
mov [edi], eax
|
|
mov eax, [esi+20]
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
mov eax, [esi+24] ;3
|
|
mov [edi], eax
|
|
mov eax, [esi+28]
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
mov eax, [esi+32] ;4
|
|
mov [edi], eax
|
|
mov eax, [esi+36]
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
mov eax, [esi+40] ;5
|
|
mov [edi], eax
|
|
mov eax, [esi+44]
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
mov eax, [esi+48] ;6
|
|
mov [edi], eax
|
|
mov eax, [esi+52]
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
mov eax, [esi+56] ;7
|
|
mov [edi], eax
|
|
mov eax, [esi+60]
|
|
mov [edi+4], eax
|
|
|
|
add esi, 64
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf12: ; low 4x4x8 (16 bytes)
|
|
mov edx, nf_width
|
|
|
|
mov eax, [esi]
|
|
mov bl, ah
|
|
mov bh, ah
|
|
shl ebx, 16
|
|
mov bl, al
|
|
mov bh, al
|
|
mov [edi], ebx
|
|
mov [edi+edx], ebx
|
|
shr eax, 16
|
|
mov bl, ah
|
|
mov bh, ah
|
|
shl ebx, 16
|
|
mov bl, al
|
|
mov bh, al
|
|
mov [edi+4], ebx
|
|
mov [edi+edx+4], ebx
|
|
lea edi, [edi+edx*2]
|
|
|
|
mov eax, [esi+4]
|
|
mov bl, ah
|
|
mov bh, ah
|
|
shl ebx, 16
|
|
mov bl, al
|
|
mov bh, al
|
|
mov [edi], ebx
|
|
mov [edi+edx], ebx
|
|
shr eax, 16
|
|
mov bl, ah
|
|
mov bh, ah
|
|
shl ebx, 16
|
|
mov bl, al
|
|
mov bh, al
|
|
mov [edi+4], ebx
|
|
mov [edi+edx+4], ebx
|
|
lea edi, [edi+edx*2]
|
|
|
|
mov eax, [esi+8]
|
|
mov bl, ah
|
|
mov bh, ah
|
|
shl ebx, 16
|
|
mov bl, al
|
|
mov bh, al
|
|
mov [edi], ebx
|
|
mov [edi+edx], ebx
|
|
shr eax, 16
|
|
mov bl, ah
|
|
mov bh, ah
|
|
shl ebx, 16
|
|
mov bl, al
|
|
mov bh, al
|
|
mov [edi+4], ebx
|
|
mov [edi+edx+4], ebx
|
|
lea edi, [edi+edx*2]
|
|
|
|
mov eax, [esi+12]
|
|
mov bl, ah
|
|
mov bh, ah
|
|
shl ebx, 16
|
|
mov bl, al
|
|
mov bh, al
|
|
mov [edi], ebx
|
|
mov [edi+edx], ebx
|
|
shr eax, 16
|
|
mov bl, ah
|
|
mov bh, ah
|
|
shl ebx, 16
|
|
mov bl, al
|
|
mov bh, al
|
|
mov [edi+4], ebx
|
|
mov [edi+edx+4], ebx
|
|
add edi, edx
|
|
|
|
sub edi, nfpk_back_right
|
|
add esi, 16
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf13: ; 2x2 4x4x0 (4 bytes)
|
|
mov edx, nf_width
|
|
|
|
mov cl, [esi]
|
|
mov ch, cl
|
|
mov eax, ecx
|
|
shl eax, 16
|
|
mov ax, cx
|
|
|
|
mov cl, [esi+1]
|
|
mov ch, cl
|
|
mov ebx, ecx
|
|
shl ebx, 16
|
|
mov bx, cx
|
|
|
|
mov [edi], eax
|
|
mov [edi+4], ebx
|
|
mov [edi+edx], eax
|
|
mov [edi+edx+4], ebx
|
|
lea edi, [edi+edx*2]
|
|
mov [edi], eax
|
|
mov [edi+4], ebx
|
|
mov [edi+edx], eax
|
|
mov [edi+edx+4], ebx
|
|
lea edi, [edi+edx*2]
|
|
|
|
mov cl, [esi+2]
|
|
mov ch, cl
|
|
mov eax, ecx
|
|
shl eax, 16
|
|
mov ax, cx
|
|
|
|
mov cl, [esi+3]
|
|
mov ch, cl
|
|
mov ebx, ecx
|
|
shl ebx, 16
|
|
mov bx, cx
|
|
|
|
mov [edi], eax
|
|
mov [edi+4], ebx
|
|
mov [edi+edx], eax
|
|
mov [edi+edx+4], ebx
|
|
lea edi, [edi+edx*2]
|
|
mov [edi], eax
|
|
mov [edi+4], ebx
|
|
add edi, edx
|
|
mov [edi], eax
|
|
mov [edi+4], ebx
|
|
|
|
sub edi, nfpk_back_right
|
|
add esi, 4
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf14: ; 8x8x0 (1 byte)
|
|
if 0 ;debug
|
|
jmp nf0
|
|
endif
|
|
mov bl, [esi] ; Copy color into 8 positions
|
|
inc esi
|
|
mov bh, bl
|
|
mov eax, ebx
|
|
shl eax, 16
|
|
mov ax, bx
|
|
mov ebx, eax
|
|
if 0 ;debug
|
|
mov eax, 080808080h
|
|
mov ebx, eax
|
|
endif
|
|
jmp nf_solid
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf15: ; mix 8x8x0 (2 bytes)
|
|
if 0 ;debug
|
|
inc esi
|
|
jmp nf0
|
|
endif
|
|
mov bx, [esi] ; Copy 2 colors into 8 positions
|
|
add esi, 2 ; in a checkerboard
|
|
mov ax, bx
|
|
shl eax, 16
|
|
mov ax, bx
|
|
mov ebx, eax
|
|
rol ebx, 8
|
|
if 0 ;debug
|
|
mov eax, 080808080h
|
|
mov ebx, eax
|
|
endif
|
|
nf_solid:
|
|
mov edx, nf_width
|
|
|
|
mov [edi], eax
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
mov [edi], ebx
|
|
mov [edi+4], ebx
|
|
add edi, edx
|
|
mov [edi], eax
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
mov [edi], ebx
|
|
mov [edi+4], ebx
|
|
add edi, edx
|
|
mov [edi], eax
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
mov [edi], ebx
|
|
mov [edi+4], ebx
|
|
add edi, edx
|
|
mov [edi], eax
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
mov [edi], ebx
|
|
mov [edi+4], ebx
|
|
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
nfPkDecomp ENDP
|
|
|
|
; Half vertical resolution version (skip odd lines)
|
|
;
|
|
nfPkDecompH PROC USES ESI EDI EBX, \
|
|
ops:PTRBYTE, comp:PTRBYTE, \
|
|
x:DWORD, y:DWORD, w:DWORD, h:DWORD
|
|
LOCAL tbuf: PTRBYTE
|
|
LOCAL new_row:DWORD
|
|
LOCAL DiffBufPtrs:DWORD
|
|
|
|
LOCAL nfpk_back_right: DWORD
|
|
LOCAL wcnt:DWORD
|
|
|
|
LOG_LABEL "StartPkDecomp"
|
|
|
|
.data
|
|
nfpk_OpTblH label dword
|
|
dword offset nf0 ; Prev Same (0)
|
|
dword offset nf1 ; No change (and copied to screen) (0)
|
|
dword offset nf2 ; Near shift from older part of current buf (1)
|
|
dword offset nf3 ; Near shift from newer part of current buf (1)
|
|
dword offset nf4 ; Near shift from previous buffer (1)
|
|
dword offset nf5 ; Far shift from previous buffer (2)
|
|
dword offset nf6 ; Far shift from current buffer (2)
|
|
; [Or if COMPOPS, run of no changes (0)]
|
|
dword offset nf7 ; 8x8x1 (10 bytes) or low 4x4x1 (4 bytes)
|
|
dword offset nf8 ; 2x2 4x4x1 (16 bytes) or 2x1 4x8x1 (12 bytes) or 1x2 8x4x1 (12 bytes)
|
|
dword offset nf9 ; 8x8x2 (20 bytes) or low 4x4x2 (8 bytes) or
|
|
; low 4x8x2 (12 bytes) or low 8x4x2 (12 bytes)
|
|
dword offset nf10 ; 2x2 4x4x2 (32 bytes) or 2x1 4x8x2 (24 bytes) or 1x2 4x8x2 (24 bytes)
|
|
dword offset nf11 ; 8x8x8 (64 bytes)
|
|
dword offset nf12 ; low 4x4x8 (16 bytes)
|
|
dword offset nf13 ; 2x2 4x4x0 (ie 2x2x8) (4 bytes)
|
|
dword offset nf14 ; 8x8x0 (1 byte)
|
|
dword offset nf15 ; mix 8x8x0 (2 bytes)
|
|
.code
|
|
|
|
|
|
NF_DECOMP_INIT 0
|
|
|
|
mov eax, nf_width
|
|
shl eax, 2
|
|
sub eax, nf_new_w
|
|
mov new_row, eax
|
|
|
|
shr nf_new_h, 1
|
|
|
|
mov eax, nf_width
|
|
lea eax, [eax*2+eax-SWIDTH]
|
|
mov nfpk_back_right, eax
|
|
|
|
mov esi, comp
|
|
mov edi, tbuf
|
|
nf_StartRow:
|
|
mov eax, w
|
|
shr eax, 1
|
|
mov wcnt,eax
|
|
ALIGN 4
|
|
nf_NextPair:
|
|
dec wcnt
|
|
js nf_NextRow
|
|
mov ebx, ops
|
|
mov al, [ebx]
|
|
inc ebx
|
|
mov ops, ebx
|
|
|
|
xor ebx, ebx
|
|
mov bl, al
|
|
shr bl, 4
|
|
and eax, 0Fh
|
|
push offset nf_NextPair
|
|
push nfpk_OpTblH[ebx*4]
|
|
jmp nfpk_OpTblH[eax*4]
|
|
|
|
nf_NextRow:
|
|
add edi, new_row
|
|
dec h
|
|
jnz nf_StartRow
|
|
LOG_LABEL "EndPkDecomp"
|
|
|
|
ret
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf0: ; No change from previous buffer
|
|
mov eax, DiffBufPtrs
|
|
jmp nf_shift
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf1: ; No change (and copied to screen)
|
|
add edi, SWIDTH
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf2: ; Near shift from older part of current buffer
|
|
xor eax, eax
|
|
mov al, [esi]
|
|
inc esi
|
|
mov ax, nfpk_ShiftP2[eax*2]
|
|
nf_xyc_shift:
|
|
xor ebx, ebx
|
|
mov bl, ah
|
|
shl eax, 24
|
|
sar eax, 24
|
|
add bl, 080h
|
|
adc bl, 080h
|
|
sar bl, 1
|
|
add eax, nfpk_ShiftY[ebx*4]
|
|
jmp nf_shift
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf3: ; Near shift from newer part of current buffer
|
|
xor eax, eax
|
|
mov al, [esi]
|
|
inc esi
|
|
mov ax, nfpk_ShiftP2[eax*2]
|
|
neg al
|
|
neg ah
|
|
jmp nf_xyc_shift
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf4: ; Near shift from previous buffer
|
|
xor eax, eax
|
|
mov al, [esi]
|
|
inc esi
|
|
mov ax, nfpk_ShiftP1[eax*2]
|
|
jmp nf_xyp_shift
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf5: ; Far shift from previous buffer
|
|
mov ax, [esi]
|
|
add esi, 2
|
|
nf_xyp_shift:
|
|
xor ebx, ebx
|
|
mov bl, ah
|
|
shl eax, 24
|
|
sar eax, 24
|
|
add bl, 080h
|
|
adc bl, 080h
|
|
sar bl, 1
|
|
add eax, nfpk_ShiftY[ebx*4]
|
|
add eax, DiffBufPtrs
|
|
jmp nf_shift
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
|
|
nf6: ; Run of no changes (must only appear in first nibble opcodes)
|
|
; Next nibble k specifies 2k+4 squares with no changes
|
|
add esp, 4 ; Next nibble is not an opcode
|
|
add ebx, 2 ; (minimum of 4 squares)
|
|
ALIGN 4
|
|
nf6a: add edi, SWIDTH*2 ; Advance over two squares
|
|
dec ebx
|
|
jz nf6z ; Last pair of squares
|
|
dec wcnt ; Same row?
|
|
jns nf6a ; Yes
|
|
add edi, new_row ; Advance to next row
|
|
dec h ; Decrement row count (should never become zero here)
|
|
mov eax, w ; Reset wcnt
|
|
shr eax ,1
|
|
dec eax
|
|
mov wcnt, eax
|
|
jmp nf6a
|
|
|
|
nf6z: retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf_shift:
|
|
if 0 ;debug
|
|
mov eax, 0
|
|
mov ebx, eax
|
|
jmp nf_solid
|
|
endif
|
|
mov ebx, esi ; save esi
|
|
lea esi, [edi+eax]
|
|
mov edx, nf_width
|
|
|
|
REPEAT 3
|
|
mov eax, [esi]
|
|
mov [edi], eax
|
|
mov eax, [esi+4]
|
|
mov [edi+4], eax
|
|
add esi, edx
|
|
add edi, edx
|
|
ENDM
|
|
mov eax, [esi]
|
|
mov [edi], eax
|
|
mov eax, [esi+4]
|
|
mov [edi+4], eax
|
|
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
mov esi, ebx ; restore esi
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf7: ; 8x8x1 (10 bytes)
|
|
|
|
mov ax, [esi]
|
|
cmp al, ah
|
|
ja nf23
|
|
|
|
if 0 ;debug
|
|
add esi, 10
|
|
mov eax, 0fefefefeH
|
|
mov ebx, eax
|
|
jmp nf_solid
|
|
endif
|
|
xor eax, eax
|
|
lea ecx, nfpk_mov8
|
|
lea edx, byte ptr ds:nf7_11+2
|
|
|
|
mov al, [esi+2]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf7_11-nf7_11)], bl
|
|
mov [edx+(nf7_12-nf7_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf7_13-nf7_11)], bl
|
|
mov [edx+(nf7_14-nf7_11)], bh
|
|
|
|
mov al, [esi+4]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf7_31-nf7_11)], bl
|
|
mov [edx+(nf7_32-nf7_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf7_33-nf7_11)], bl
|
|
mov [edx+(nf7_34-nf7_11)], bh
|
|
|
|
lea edx, [edx+(nf7_51-nf7_11)]
|
|
|
|
mov al, [esi+6]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf7_51-nf7_51)], bl
|
|
mov [edx+(nf7_52-nf7_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf7_53-nf7_51)], bl
|
|
mov [edx+(nf7_54-nf7_51)], bh
|
|
|
|
mov al, [esi+8]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf7_71-nf7_51)], bl
|
|
mov [edx+(nf7_72-nf7_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf7_73-nf7_51)], bl
|
|
mov [edx+(nf7_74-nf7_51)], bh
|
|
|
|
push ebp
|
|
push esi
|
|
; load bx,dx,cx,bp with 00,01,10,11 color combinations
|
|
; (note that bits are read least significant first).
|
|
mov cx, [esi]
|
|
mov esi,nf_width
|
|
mov bl,cl
|
|
mov bh,cl
|
|
mov dl,ch
|
|
mov dh,cl
|
|
mov al,ch
|
|
mov ah,ch
|
|
mov ebp,eax
|
|
jmp nf7_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf7_0:
|
|
nf7_11: mov ax, bx
|
|
shl eax, 16
|
|
nf7_12: mov ax, bx
|
|
mov [edi], eax
|
|
nf7_13: mov ax, bx
|
|
shl eax, 16
|
|
nf7_14: mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf7_31: mov ax, bx
|
|
shl eax, 16
|
|
nf7_32: mov ax, bx
|
|
mov [edi], eax
|
|
nf7_33: mov ax, bx
|
|
shl eax, 16
|
|
nf7_34: mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf7_51: mov ax, bx
|
|
shl eax, 16
|
|
nf7_52: mov ax, bx
|
|
mov [edi], eax
|
|
nf7_53: mov ax, bx
|
|
shl eax, 16
|
|
nf7_54: mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf7_71: mov ax, bx
|
|
shl eax, 16
|
|
nf7_72: mov ax, bx
|
|
mov [edi], eax
|
|
nf7_73: mov ax, bx
|
|
shl eax, 16
|
|
nf7_74: mov ax, bx
|
|
mov [edi+4], eax
|
|
|
|
pop esi
|
|
pop ebp
|
|
add esi, 10
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
;nf7+16
|
|
nf23: ; low 4x4x1 (4 bytes)
|
|
|
|
xor eax, eax
|
|
lea ecx, nfpk_mov4l
|
|
lea edx, byte ptr ds:nf23_11+2
|
|
|
|
mov al, [esi+2]
|
|
and al, 0fH
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf23_11-nf23_11)], bl
|
|
mov [edx+(nf23_12-nf23_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf23_13-nf23_11)], bl
|
|
mov [edx+(nf23_14-nf23_11)], bh
|
|
|
|
mov al, [esi+2]
|
|
shr al, 4
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf23_31-nf23_11)], bl
|
|
mov [edx+(nf23_32-nf23_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf23_33-nf23_11)], bl
|
|
mov [edx+(nf23_34-nf23_11)], bh
|
|
|
|
|
|
mov al, [esi+3]
|
|
and al, 0fH
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf23_51-nf23_11)], bl
|
|
mov [edx+(nf23_52-nf23_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf23_53-nf23_11)], bl
|
|
mov [edx+(nf23_54-nf23_11)], bh
|
|
|
|
mov al, [esi+3]
|
|
shr al, 4
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf23_71-nf23_11)], bl
|
|
mov [edx+(nf23_72-nf23_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf23_73-nf23_11)], bl
|
|
mov [edx+(nf23_74-nf23_11)], bh
|
|
|
|
mov edx, nf_width
|
|
|
|
; load bx,cx with 00,11 color combinations
|
|
mov bx, [esi]
|
|
mov cl, bh
|
|
mov bh, bl
|
|
mov ch, cl
|
|
|
|
jmp nf23_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf23_0:
|
|
|
|
nf23_11:mov ax, bx
|
|
shl eax, 16
|
|
nf23_12:mov ax, bx
|
|
mov [edi], eax
|
|
|
|
nf23_13:mov ax, bx
|
|
shl eax, 16
|
|
nf23_14:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf23_31:mov ax, bx
|
|
shl eax, 16
|
|
nf23_32:mov ax, bx
|
|
mov [edi], eax
|
|
|
|
nf23_33:mov ax, bx
|
|
shl eax, 16
|
|
nf23_34:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf23_51:mov ax, bx
|
|
shl eax, 16
|
|
nf23_52:mov ax, bx
|
|
mov [edi], eax
|
|
|
|
nf23_53:mov ax, bx
|
|
shl eax, 16
|
|
nf23_54:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf23_71:mov ax, bx
|
|
shl eax, 16
|
|
nf23_72:mov ax, bx
|
|
mov [edi], eax
|
|
|
|
nf23_73:mov ax, bx
|
|
shl eax, 16
|
|
nf23_74:mov ax, bx
|
|
mov [edi+4], eax
|
|
|
|
sub edi, nfpk_back_right
|
|
add esi, 4
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf8: ; 2x2 4x4x1 (16 bytes)
|
|
|
|
mov ax, [esi]
|
|
cmp al, ah
|
|
ja nf24
|
|
|
|
xor eax, eax
|
|
lea ecx, nfpk_mov8
|
|
lea edx, byte ptr ds:nf8_11+2
|
|
|
|
mov al, [esi+2]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf8_11-nf8_11)], bl
|
|
mov [edx+(nf8_12-nf8_11)], bh
|
|
|
|
mov al, [esi+3]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf8_21-nf8_11)], bl
|
|
mov [edx+(nf8_22-nf8_11)], bh
|
|
|
|
|
|
mov al, [esi+6]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf8_31-nf8_11)], bl
|
|
mov [edx+(nf8_32-nf8_11)], bh
|
|
|
|
mov al, [esi+7]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf8_41-nf8_11)], bl
|
|
mov [edx+(nf8_42-nf8_11)], bh
|
|
|
|
add edx, nf8_51-nf8_11
|
|
|
|
mov al, [esi+10]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf8_51-nf8_51)], bl
|
|
mov [edx+(nf8_52-nf8_51)], bh
|
|
|
|
mov al, [esi+11]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf8_61-nf8_51)], bl
|
|
mov [edx+(nf8_62-nf8_51)], bh
|
|
|
|
|
|
mov al, [esi+14]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf8_71-nf8_51)], bl
|
|
mov [edx+(nf8_72-nf8_51)], bh
|
|
|
|
mov al, [esi+15]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf8_81-nf8_51)], bl
|
|
mov [edx+(nf8_82-nf8_51)], bh
|
|
|
|
|
|
push ebp
|
|
push esi
|
|
; load bx,dx,cx,bp with 00,01,10,11 color combinations
|
|
; (note that bits are read least significant first).
|
|
mov cx, [esi]
|
|
mov esi, nf_width
|
|
mov bl,cl
|
|
mov bh,cl
|
|
mov dl,ch
|
|
mov dh,cl
|
|
mov al,ch
|
|
mov ah,ch
|
|
mov ebp,eax
|
|
|
|
jmp nf8_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf8_0:
|
|
nf8_11: mov ax, bx
|
|
shl eax, 16
|
|
nf8_12: mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
|
|
nf8_21: mov ax, bx
|
|
shl eax, 16
|
|
nf8_22: mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
|
|
mov eax, [esp]
|
|
mov cx, [eax+4]
|
|
mov bl,cl
|
|
mov bh,cl
|
|
mov dl,ch
|
|
mov dh,cl
|
|
mov al,ch
|
|
mov ah,ch
|
|
mov ebp,eax
|
|
|
|
nf8_31: mov ax, bx
|
|
shl eax, 16
|
|
nf8_32: mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
|
|
nf8_41: mov ax, bx
|
|
shl eax, 16
|
|
nf8_42: mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
|
|
lea eax, [esi*4-4]
|
|
sub edi, eax
|
|
|
|
mov eax, [esp]
|
|
mov cx, [eax+8]
|
|
mov bl,cl
|
|
mov bh,cl
|
|
mov dl,ch
|
|
mov dh,cl
|
|
mov al,ch
|
|
mov ah,ch
|
|
mov ebp,eax
|
|
|
|
nf8_51: mov ax, bx
|
|
shl eax, 16
|
|
nf8_52: mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
|
|
nf8_61: mov ax, bx
|
|
shl eax, 16
|
|
nf8_62: mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
|
|
mov eax, [esp]
|
|
mov cx, [eax+12]
|
|
mov bl,cl
|
|
mov bh,cl
|
|
mov dl,ch
|
|
mov dh,cl
|
|
mov al,ch
|
|
mov ah,ch
|
|
mov ebp,eax
|
|
|
|
nf8_71: mov ax, bx
|
|
shl eax, 16
|
|
nf8_72: mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
|
|
nf8_81: mov ax, bx
|
|
shl eax, 16
|
|
nf8_82: mov ax, bx
|
|
mov [edi], eax
|
|
|
|
pop esi
|
|
pop ebp
|
|
add esi, 16
|
|
sub edi, 4
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
;nf8+16
|
|
nf24: ; 2x1 4x8x1 (12 bytes)
|
|
|
|
mov ax, [esi+6]
|
|
cmp al, ah
|
|
ja nf40
|
|
|
|
xor eax, eax
|
|
lea ecx, nfpk_mov8
|
|
lea edx, byte ptr ds:nf24_11+2
|
|
|
|
mov al, [esi+2]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf24_11-nf24_11)], bl
|
|
mov [edx+(nf24_12-nf24_11)], bh
|
|
|
|
mov al, [esi+3]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf24_21-nf24_11)], bl
|
|
mov [edx+(nf24_22-nf24_11)], bh
|
|
|
|
mov al, [esi+4]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf24_31-nf24_11)], bl
|
|
mov [edx+(nf24_32-nf24_11)], bh
|
|
|
|
mov al, [esi+5]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf24_41-nf24_11)], bl
|
|
mov [edx+(nf24_42-nf24_11)], bh
|
|
|
|
add edx, nf24_51-nf24_11
|
|
|
|
mov al, [esi+8]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf24_51-nf24_51)], bl
|
|
mov [edx+(nf24_52-nf24_51)], bh
|
|
|
|
mov al, [esi+9]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf24_61-nf24_51)], bl
|
|
mov [edx+(nf24_62-nf24_51)], bh
|
|
|
|
mov al, [esi+10]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf24_71-nf24_51)], bl
|
|
mov [edx+(nf24_72-nf24_51)], bh
|
|
|
|
mov al, [esi+11]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf24_81-nf24_51)], bl
|
|
mov [edx+(nf24_82-nf24_51)], bh
|
|
|
|
push ebp
|
|
push esi
|
|
; load bx,dx,cx,bp with 00,01,10,11 color combinations
|
|
; (note that bits are read least significant first).
|
|
mov cx, [esi]
|
|
mov esi, nf_width
|
|
mov bl,cl
|
|
mov bh,cl
|
|
mov dl,ch
|
|
mov dh,cl
|
|
mov al,ch
|
|
mov ah,ch
|
|
mov ebp,eax
|
|
|
|
jmp nf24_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf24_0:
|
|
nf24_11:mov ax, bx
|
|
shl eax, 16
|
|
nf24_12:mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
|
|
nf24_21:mov ax, bx
|
|
shl eax, 16
|
|
nf24_22:mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
|
|
nf24_31:mov ax, bx
|
|
shl eax, 16
|
|
nf24_32:mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
|
|
nf24_41:mov ax, bx
|
|
shl eax, 16
|
|
nf24_42:mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
|
|
lea eax, [esi*4-4]
|
|
sub edi, eax
|
|
|
|
mov eax, [esp]
|
|
mov cx, [eax+6]
|
|
mov bl,cl
|
|
mov bh,cl
|
|
mov dl,ch
|
|
mov dh,cl
|
|
mov al,ch
|
|
mov ah,ch
|
|
mov ebp,eax
|
|
|
|
nf24_51:mov ax, bx
|
|
shl eax, 16
|
|
nf24_52:mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
|
|
nf24_61:mov ax, bx
|
|
shl eax, 16
|
|
nf24_62:mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
|
|
nf24_71:mov ax, bx
|
|
shl eax, 16
|
|
nf24_72:mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
|
|
nf24_81:mov ax, bx
|
|
shl eax, 16
|
|
nf24_82:mov ax, bx
|
|
mov [edi], eax
|
|
|
|
pop esi
|
|
pop ebp
|
|
add esi, 12
|
|
sub edi, 4
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
;nf8+32
|
|
nf40: ; 1x2 8x4x1 (12 bytes)
|
|
xor eax, eax
|
|
lea ecx, nfpk_mov8
|
|
lea edx, byte ptr ds:nf40_11+2
|
|
|
|
mov al, [esi+2]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf40_11-nf40_11)], bl
|
|
mov [edx+(nf40_12-nf40_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf40_13-nf40_11)], bl
|
|
mov [edx+(nf40_14-nf40_11)], bh
|
|
|
|
mov al, [esi+4]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf40_31-nf40_11)], bl
|
|
mov [edx+(nf40_32-nf40_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf40_33-nf40_11)], bl
|
|
mov [edx+(nf40_34-nf40_11)], bh
|
|
|
|
add edx, nf40_51-nf40_11
|
|
|
|
mov al, [esi+8]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf40_51-nf40_51)], bl
|
|
mov [edx+(nf40_52-nf40_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf40_53-nf40_51)], bl
|
|
mov [edx+(nf40_54-nf40_51)], bh
|
|
|
|
mov al, [esi+10]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf40_71-nf40_51)], bl
|
|
mov [edx+(nf40_72-nf40_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf40_73-nf40_51)], bl
|
|
mov [edx+(nf40_74-nf40_51)], bh
|
|
|
|
push ebp
|
|
push esi
|
|
; load bx,dx,cx,bp with 00,01,10,11 color combinations
|
|
; (note that bits are read least significant first).
|
|
mov cx, [esi]
|
|
mov esi, nf_width
|
|
mov bl,cl
|
|
mov bh,cl
|
|
mov dl,ch
|
|
mov dh,cl
|
|
mov al,ch
|
|
mov ah,ch
|
|
mov ebp,eax
|
|
|
|
jmp nf40_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf40_0:
|
|
nf40_11:mov ax, bx
|
|
shl eax, 16
|
|
nf40_12:mov ax, bx
|
|
mov [edi], eax
|
|
nf40_13:mov ax, bx
|
|
shl eax, 16
|
|
nf40_14:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf40_31:mov ax, bx
|
|
shl eax, 16
|
|
nf40_32:mov ax, bx
|
|
mov [edi], eax
|
|
nf40_33:mov ax, bx
|
|
shl eax, 16
|
|
nf40_34:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
mov eax, [esp]
|
|
mov cx, [eax+6]
|
|
mov bl,cl
|
|
mov bh,cl
|
|
mov dl,ch
|
|
mov dh,cl
|
|
mov al,ch
|
|
mov ah,ch
|
|
mov ebp,eax
|
|
|
|
nf40_51:mov ax, bx
|
|
shl eax, 16
|
|
nf40_52:mov ax, bx
|
|
mov [edi], eax
|
|
nf40_53:mov ax, bx
|
|
shl eax, 16
|
|
nf40_54:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf40_71:mov ax, bx
|
|
shl eax, 16
|
|
nf40_72:mov ax, bx
|
|
mov [edi], eax
|
|
nf40_73:mov ax, bx
|
|
shl eax, 16
|
|
nf40_74:mov ax, bx
|
|
mov [edi+4], eax
|
|
|
|
pop esi
|
|
pop ebp
|
|
add esi, 12
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf9: ; 8x8x2 (20 bytes)
|
|
|
|
mov eax, [esi]
|
|
cmp al, ah
|
|
ja nf41
|
|
|
|
shr eax, 16
|
|
cmp al, ah
|
|
ja nf25
|
|
|
|
xor eax, eax
|
|
lea ecx, nfpk_mov4
|
|
lea edx, byte ptr ds:nf9_11+1
|
|
|
|
mov al, [esi+4]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_11-nf9_11)], bl
|
|
mov [edx+(nf9_12-nf9_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf9_13-nf9_11)], bl
|
|
mov [edx+(nf9_14-nf9_11)], bh
|
|
|
|
mov al, [esi+5]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_15-nf9_11)], bl
|
|
mov [edx+(nf9_16-nf9_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf9_17-nf9_11)], bl
|
|
mov [edx+(nf9_18-nf9_11)], bh
|
|
|
|
|
|
mov al, [esi+8]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_31-nf9_11)], bl
|
|
mov [edx+(nf9_32-nf9_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf9_33-nf9_11)], bl
|
|
mov [edx+(nf9_34-nf9_11)], bh
|
|
|
|
mov al, [esi+9]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_35-nf9_11)], bl
|
|
mov [edx+(nf9_36-nf9_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf9_37-nf9_11)], bl
|
|
mov [edx+(nf9_38-nf9_11)], bh
|
|
|
|
lea edx, [edx+(nf9_51-nf9_11)]
|
|
|
|
mov al, [esi+12]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_51-nf9_51)], bl
|
|
mov [edx+(nf9_52-nf9_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf9_53-nf9_51)], bl
|
|
mov [edx+(nf9_54-nf9_51)], bh
|
|
|
|
mov al, [esi+13]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_55-nf9_51)], bl
|
|
mov [edx+(nf9_56-nf9_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf9_57-nf9_51)], bl
|
|
mov [edx+(nf9_58-nf9_51)], bh
|
|
|
|
|
|
mov al, [esi+16]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_71-nf9_51)], bl
|
|
mov [edx+(nf9_72-nf9_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf9_73-nf9_51)], bl
|
|
mov [edx+(nf9_74-nf9_51)], bh
|
|
|
|
mov al, [esi+17]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_75-nf9_51)], bl
|
|
mov [edx+(nf9_76-nf9_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf9_77-nf9_51)], bl
|
|
mov [edx+(nf9_78-nf9_51)], bh
|
|
|
|
; Load bl,bh,cl,ch with four colors
|
|
mov bx, [esi]
|
|
mov cx, [esi+2]
|
|
|
|
mov edx, nf_width
|
|
jmp nf9_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf9_0:
|
|
nf9_11: mov al, bl
|
|
nf9_12: mov ah, bl
|
|
shl eax, 16
|
|
nf9_13: mov al, bl
|
|
nf9_14: mov ah, bl
|
|
mov [edi], eax
|
|
|
|
nf9_15: mov al, bl
|
|
nf9_16: mov ah, bl
|
|
shl eax, 16
|
|
nf9_17: mov al, bl
|
|
nf9_18: mov ah, bl
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf9_31: mov al, bl
|
|
nf9_32: mov ah, bl
|
|
shl eax, 16
|
|
nf9_33: mov al, bl
|
|
nf9_34: mov ah, bl
|
|
mov [edi], eax
|
|
|
|
nf9_35: mov al, bl
|
|
nf9_36: mov ah, bl
|
|
shl eax, 16
|
|
nf9_37: mov al, bl
|
|
nf9_38: mov ah, bl
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf9_51: mov al, bl
|
|
nf9_52: mov ah, bl
|
|
shl eax, 16
|
|
nf9_53: mov al, bl
|
|
nf9_54: mov ah, bl
|
|
mov [edi], eax
|
|
|
|
nf9_55: mov al, bl
|
|
nf9_56: mov ah, bl
|
|
shl eax, 16
|
|
nf9_57: mov al, bl
|
|
nf9_58: mov ah, bl
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf9_71: mov al, bl
|
|
nf9_72: mov ah, bl
|
|
shl eax, 16
|
|
nf9_73: mov al, bl
|
|
nf9_74: mov ah, bl
|
|
mov [edi], eax
|
|
|
|
nf9_75: mov al, bl
|
|
nf9_76: mov ah, bl
|
|
shl eax, 16
|
|
nf9_77: mov al, bl
|
|
nf9_78: mov ah, bl
|
|
mov [edi+4], eax
|
|
|
|
add esi, 20
|
|
sub edi, nfpk_back_right
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
;nf9+16
|
|
nf25: ; low 4x4x2 (8 bytes)
|
|
|
|
if 0 ;debug
|
|
mov eax, 0
|
|
mov ebx, 0
|
|
add esi, 8
|
|
jmp nf_solid
|
|
endif
|
|
|
|
xor eax, eax
|
|
lea ecx, nfpk_mov4
|
|
lea edx, byte ptr ds:nf25_11+1
|
|
|
|
mov al, [esi+4]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf25_14-nf25_11)], bl
|
|
mov [edx+(nf25_13-nf25_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf25_12-nf25_11)], bl
|
|
mov [edx+(nf25_11-nf25_11)], bh
|
|
|
|
mov al, [esi+5]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf25_24-nf25_11)], bl
|
|
mov [edx+(nf25_23-nf25_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf25_22-nf25_11)], bl
|
|
mov [edx+(nf25_21-nf25_11)], bh
|
|
|
|
|
|
mov al, [esi+6]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf25_34-nf25_11)], bl
|
|
mov [edx+(nf25_33-nf25_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf25_32-nf25_11)], bl
|
|
mov [edx+(nf25_31-nf25_11)], bh
|
|
|
|
mov al, [esi+7]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf25_44-nf25_11)], bl
|
|
mov [edx+(nf25_43-nf25_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf25_42-nf25_11)], bl
|
|
mov [edx+(nf25_41-nf25_11)], bh
|
|
|
|
; Load bl,bh,cl,ch with four colors
|
|
mov bx, [esi]
|
|
mov cx, [esi+2]
|
|
|
|
mov edx, nf_width
|
|
jmp nf25_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf25_0:
|
|
nf25_11:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf25_12:mov al, bl
|
|
mov ah, al
|
|
mov [edi], eax
|
|
nf25_13:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf25_14:mov al, bl
|
|
mov ah, al
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf25_21:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf25_22:mov al, bl
|
|
mov ah, al
|
|
mov [edi], eax
|
|
nf25_23:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf25_24:mov al, bl
|
|
mov ah, al
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf25_31:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf25_32:mov al, bl
|
|
mov ah, al
|
|
mov [edi], eax
|
|
nf25_33:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf25_34:mov al, bl
|
|
mov ah, al
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf25_41:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf25_42:mov al, bl
|
|
mov ah, al
|
|
mov [edi], eax
|
|
nf25_43:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf25_44:mov al, bl
|
|
mov ah, al
|
|
mov [edi+4], eax
|
|
|
|
add esi, 8
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
;nf9+32
|
|
nf41: ; low 4x8x2 (12 bytes)
|
|
shr eax, 16
|
|
cmp al, ah
|
|
ja nf57
|
|
|
|
xor eax, eax
|
|
lea ecx, nfpk_mov4
|
|
lea edx, byte ptr ds:nf41_11+1
|
|
|
|
mov al, [esi+4]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf41_14-nf41_11)], bl
|
|
mov [edx+(nf41_13-nf41_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf41_12-nf41_11)], bl
|
|
mov [edx+(nf41_11-nf41_11)], bh
|
|
|
|
mov al, [esi+6]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf41_34-nf41_11)], bl
|
|
mov [edx+(nf41_33-nf41_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf41_32-nf41_11)], bl
|
|
mov [edx+(nf41_31-nf41_11)], bh
|
|
|
|
lea edx, [edx+(nf41_51-nf41_11)]
|
|
|
|
mov al, [esi+8]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf41_54-nf41_51)], bl
|
|
mov [edx+(nf41_53-nf41_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf41_52-nf41_51)], bl
|
|
mov [edx+(nf41_51-nf41_51)], bh
|
|
|
|
mov al, [esi+10]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf41_74-nf41_51)], bl
|
|
mov [edx+(nf41_73-nf41_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf41_72-nf41_51)], bl
|
|
mov [edx+(nf41_71-nf41_51)], bh
|
|
|
|
; Load bl,bh,cl,ch with four colors
|
|
mov bx, [esi]
|
|
mov cx, [esi+2]
|
|
|
|
mov edx, nf_width
|
|
jmp nf41_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf41_0:
|
|
nf41_11:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf41_12:mov al, bl
|
|
mov ah, al
|
|
mov [edi], eax
|
|
nf41_13:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf41_14:mov al, bl
|
|
mov ah, al
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf41_31:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf41_32:mov al, bl
|
|
mov ah, al
|
|
mov [edi], eax
|
|
nf41_33:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf41_34:mov al, bl
|
|
mov ah, al
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf41_51:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf41_52:mov al, bl
|
|
mov ah, al
|
|
mov [edi], eax
|
|
nf41_53:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf41_54:mov al, bl
|
|
mov ah, al
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf41_71:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf41_72:mov al, bl
|
|
mov ah, al
|
|
mov [edi], eax
|
|
nf41_73:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf41_74:mov al, bl
|
|
mov ah, al
|
|
mov [edi+4], eax
|
|
|
|
add esi, 12
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
;nf9+48
|
|
nf57: ; low 8x4x2 (12 bytes)
|
|
xor eax, eax
|
|
lea ecx, nfpk_mov4
|
|
lea edx, byte ptr ds:nf57_11+1
|
|
|
|
mov al, [esi+4]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf57_11-nf57_11)], bl
|
|
mov [edx+(nf57_12-nf57_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf57_13-nf57_11)], bl
|
|
mov [edx+(nf57_14-nf57_11)], bh
|
|
|
|
mov al, [esi+5]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf57_15-nf57_11)], bl
|
|
mov [edx+(nf57_16-nf57_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf57_17-nf57_11)], bl
|
|
mov [edx+(nf57_18-nf57_11)], bh
|
|
|
|
|
|
mov al, [esi+6]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf57_21-nf57_11)], bl
|
|
mov [edx+(nf57_22-nf57_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf57_23-nf57_11)], bl
|
|
mov [edx+(nf57_24-nf57_11)], bh
|
|
|
|
mov al, [esi+7]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf57_25-nf57_11)], bl
|
|
mov [edx+(nf57_26-nf57_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf57_27-nf57_11)], bl
|
|
mov [edx+(nf57_28-nf57_11)], bh
|
|
|
|
|
|
mov al, [esi+8]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf57_31-nf57_11)], bl
|
|
mov [edx+(nf57_32-nf57_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf57_33-nf57_11)], bl
|
|
mov [edx+(nf57_34-nf57_11)], bh
|
|
|
|
mov al, [esi+9]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf57_35-nf57_11)], bl
|
|
mov [edx+(nf57_36-nf57_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf57_37-nf57_11)], bl
|
|
mov [edx+(nf57_38-nf57_11)], bh
|
|
|
|
|
|
mov al, [esi+10]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf57_41-nf57_11)], bl
|
|
mov [edx+(nf57_42-nf57_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf57_43-nf57_11)], bl
|
|
mov [edx+(nf57_44-nf57_11)], bh
|
|
|
|
mov al, [esi+11]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf57_45-nf57_11)], bl
|
|
mov [edx+(nf57_46-nf57_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf57_47-nf57_11)], bl
|
|
mov [edx+(nf57_48-nf57_11)], bh
|
|
|
|
; Load bl,bh,cl,ch with four colors
|
|
mov bx, [esi]
|
|
mov cx, [esi+2]
|
|
|
|
mov edx, nf_width
|
|
jmp nf57_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf57_0:
|
|
nf57_11:mov al, bl
|
|
nf57_12:mov ah, bl
|
|
shl eax, 16
|
|
nf57_13:mov al, bl
|
|
nf57_14:mov ah, bl
|
|
mov [edi], eax
|
|
|
|
nf57_15:mov al, bl
|
|
nf57_16:mov ah, bl
|
|
shl eax, 16
|
|
nf57_17:mov al, bl
|
|
nf57_18:mov ah, bl
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf57_21:mov al, bl
|
|
nf57_22:mov ah, bl
|
|
shl eax, 16
|
|
nf57_23:mov al, bl
|
|
nf57_24:mov ah, bl
|
|
mov [edi], eax
|
|
|
|
nf57_25:mov al, bl
|
|
nf57_26:mov ah, bl
|
|
shl eax, 16
|
|
nf57_27:mov al, bl
|
|
nf57_28:mov ah, bl
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf57_31:mov al, bl
|
|
nf57_32:mov ah, bl
|
|
shl eax, 16
|
|
nf57_33:mov al, bl
|
|
nf57_34:mov ah, bl
|
|
mov [edi], eax
|
|
|
|
nf57_35:mov al, bl
|
|
nf57_36:mov ah, bl
|
|
shl eax, 16
|
|
nf57_37:mov al, bl
|
|
nf57_38:mov ah, bl
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf57_41:mov al, bl
|
|
nf57_42:mov ah, bl
|
|
shl eax, 16
|
|
nf57_43:mov al, bl
|
|
nf57_44:mov ah, bl
|
|
mov [edi], eax
|
|
|
|
nf57_45:mov al, bl
|
|
nf57_46:mov ah, bl
|
|
shl eax, 16
|
|
nf57_47:mov al, bl
|
|
nf57_48:mov ah, bl
|
|
mov [edi+4], eax
|
|
|
|
add esi, 12
|
|
sub edi, nfpk_back_right
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf10: ; 2x2 4x4x2 (32 bytes)
|
|
|
|
mov ax, [esi]
|
|
cmp al, ah
|
|
ja nf26
|
|
|
|
xor eax, eax
|
|
lea ecx, nfpk_mov4
|
|
lea edx, byte ptr ds:nf10_11+1
|
|
|
|
mov al, [esi+4]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_11-nf10_11)], bl
|
|
mov [edx+(nf10_12-nf10_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf10_13-nf10_11)], bl
|
|
mov [edx+(nf10_14-nf10_11)], bh
|
|
|
|
mov al, [esi+6]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_21-nf10_11)], bl
|
|
mov [edx+(nf10_22-nf10_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf10_23-nf10_11)], bl
|
|
mov [edx+(nf10_24-nf10_11)], bh
|
|
|
|
mov al, [esi+12]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_31-nf10_11)], bl
|
|
mov [edx+(nf10_32-nf10_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf10_33-nf10_11)], bl
|
|
mov [edx+(nf10_34-nf10_11)], bh
|
|
|
|
mov al, [esi+14]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_41-nf10_11)], bl
|
|
mov [edx+(nf10_42-nf10_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf10_43-nf10_11)], bl
|
|
mov [edx+(nf10_44-nf10_11)], bh
|
|
|
|
lea edx, [edx+(nf10_51-nf10_11)]
|
|
|
|
mov al, [esi+20]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_51-nf10_51)], bl
|
|
mov [edx+(nf10_52-nf10_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf10_53-nf10_51)], bl
|
|
mov [edx+(nf10_54-nf10_51)], bh
|
|
|
|
mov al, [esi+22]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_61-nf10_51)], bl
|
|
mov [edx+(nf10_62-nf10_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf10_63-nf10_51)], bl
|
|
mov [edx+(nf10_64-nf10_51)], bh
|
|
|
|
mov al, [esi+28]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_71-nf10_51)], bl
|
|
mov [edx+(nf10_72-nf10_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf10_73-nf10_51)], bl
|
|
mov [edx+(nf10_74-nf10_51)], bh
|
|
|
|
mov al, [esi+30]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_81-nf10_51)], bl
|
|
mov [edx+(nf10_82-nf10_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf10_83-nf10_51)], bl
|
|
mov [edx+(nf10_84-nf10_51)], bh
|
|
|
|
; Load bl,bh,cl,ch with four colors
|
|
mov bx, [esi]
|
|
mov cx, [esi+2]
|
|
|
|
mov edx, nf_width
|
|
jmp nf10_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf10_0:
|
|
nf10_11:mov al, bl
|
|
nf10_12:mov ah, bl
|
|
shl eax, 16
|
|
nf10_13:mov al, bl
|
|
nf10_14:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf10_21:mov al, bl
|
|
nf10_22:mov ah, bl
|
|
shl eax, 16
|
|
nf10_23:mov al, bl
|
|
nf10_24:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
; Load bl,bh,cl,ch with four colors
|
|
mov bx, [esi+8]
|
|
mov cx, [esi+10]
|
|
|
|
nf10_31:mov al, bl
|
|
nf10_32:mov ah, bl
|
|
shl eax, 16
|
|
nf10_33:mov al, bl
|
|
nf10_34:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf10_41:mov al, bl
|
|
nf10_42:mov ah, bl
|
|
shl eax, 16
|
|
nf10_43:mov al, bl
|
|
nf10_44:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
lea eax, [edx*4-4]
|
|
sub edi, eax
|
|
|
|
; Load bl,bh,cl,ch with four colors
|
|
mov bx, [esi+16]
|
|
mov cx, [esi+18]
|
|
|
|
nf10_51:mov al, bl
|
|
nf10_52:mov ah, bl
|
|
shl eax, 16
|
|
nf10_53:mov al, bl
|
|
nf10_54:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf10_61:mov al, bl
|
|
nf10_62:mov ah, bl
|
|
shl eax, 16
|
|
nf10_63:mov al, bl
|
|
nf10_64:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
; Load bl,bh,cl,ch with four colors
|
|
mov bx, [esi+24]
|
|
mov cx, [esi+26]
|
|
|
|
nf10_71:mov al, bl
|
|
nf10_72:mov ah, bl
|
|
shl eax, 16
|
|
nf10_73:mov al, bl
|
|
nf10_74:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf10_81:mov al, bl
|
|
nf10_82:mov ah, bl
|
|
shl eax, 16
|
|
nf10_83:mov al, bl
|
|
nf10_84:mov ah, bl
|
|
mov [edi], eax
|
|
|
|
add esi, 32
|
|
sub edi, 4
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
;nf10+16
|
|
nf26: ; 2x1 4x8x2 (24 bytes)
|
|
|
|
mov ax, [esi+12]
|
|
cmp al, ah
|
|
ja nf42
|
|
|
|
if 0 ;debug
|
|
mov eax, 0
|
|
mov ebx, 0
|
|
add esi, 24
|
|
jmp nf_solid
|
|
endif
|
|
|
|
xor eax, eax
|
|
lea ecx, nfpk_mov4
|
|
lea edx, byte ptr ds:nf26_11+1
|
|
|
|
mov al, [esi+4]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_11-nf26_11)], bl
|
|
mov [edx+(nf26_12-nf26_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf26_13-nf26_11)], bl
|
|
mov [edx+(nf26_14-nf26_11)], bh
|
|
|
|
mov al, [esi+6]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_21-nf26_11)], bl
|
|
mov [edx+(nf26_22-nf26_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf26_23-nf26_11)], bl
|
|
mov [edx+(nf26_24-nf26_11)], bh
|
|
|
|
mov al, [esi+8]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_31-nf26_11)], bl
|
|
mov [edx+(nf26_32-nf26_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf26_33-nf26_11)], bl
|
|
mov [edx+(nf26_34-nf26_11)], bh
|
|
|
|
mov al, [esi+10]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_41-nf26_11)], bl
|
|
mov [edx+(nf26_42-nf26_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf26_43-nf26_11)], bl
|
|
mov [edx+(nf26_44-nf26_11)], bh
|
|
|
|
lea edx, [edx+(nf26_51-nf26_11)]
|
|
|
|
mov al, [esi+16]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_51-nf26_51)], bl
|
|
mov [edx+(nf26_52-nf26_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf26_53-nf26_51)], bl
|
|
mov [edx+(nf26_54-nf26_51)], bh
|
|
|
|
mov al, [esi+18]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_61-nf26_51)], bl
|
|
mov [edx+(nf26_62-nf26_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf26_63-nf26_51)], bl
|
|
mov [edx+(nf26_64-nf26_51)], bh
|
|
|
|
mov al, [esi+20]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_71-nf26_51)], bl
|
|
mov [edx+(nf26_72-nf26_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf26_73-nf26_51)], bl
|
|
mov [edx+(nf26_74-nf26_51)], bh
|
|
|
|
mov al, [esi+22]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_81-nf26_51)], bl
|
|
mov [edx+(nf26_82-nf26_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf26_83-nf26_51)], bl
|
|
mov [edx+(nf26_84-nf26_51)], bh
|
|
|
|
; Load bl,bh,cl,ch with four colors
|
|
mov bx, [esi]
|
|
mov cx, [esi+2]
|
|
|
|
mov edx, nf_width
|
|
jmp nf26_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf26_0:
|
|
nf26_11:mov al, bl
|
|
nf26_12:mov ah, bl
|
|
shl eax, 16
|
|
nf26_13:mov al, bl
|
|
nf26_14:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf26_21:mov al, bl
|
|
nf26_22:mov ah, bl
|
|
shl eax, 16
|
|
nf26_23:mov al, bl
|
|
nf26_24:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf26_31:mov al, bl
|
|
nf26_32:mov ah, bl
|
|
shl eax, 16
|
|
nf26_33:mov al, bl
|
|
nf26_34:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf26_41:mov al, bl
|
|
nf26_42:mov ah, bl
|
|
shl eax, 16
|
|
nf26_43:mov al, bl
|
|
nf26_44:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
lea eax, [edx*4-4]
|
|
sub edi, eax
|
|
|
|
; Load bl,bh,cl,ch with four colors
|
|
mov bx, [esi+12]
|
|
mov cx, [esi+14]
|
|
|
|
nf26_51:mov al, bl
|
|
nf26_52:mov ah, bl
|
|
shl eax, 16
|
|
nf26_53:mov al, bl
|
|
nf26_54:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf26_61:mov al, bl
|
|
nf26_62:mov ah, bl
|
|
shl eax, 16
|
|
nf26_63:mov al, bl
|
|
nf26_64:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf26_71:mov al, bl
|
|
nf26_72:mov ah, bl
|
|
shl eax, 16
|
|
nf26_73:mov al, bl
|
|
nf26_74:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf26_81:mov al, bl
|
|
nf26_82:mov ah, bl
|
|
shl eax, 16
|
|
nf26_83:mov al, bl
|
|
nf26_84:mov ah, bl
|
|
mov [edi], eax
|
|
|
|
add esi, 24
|
|
sub edi, 4
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
;nf10+32
|
|
nf42: ; 1x2 8x4x2 (24 bytes)
|
|
|
|
if 0 ;debug
|
|
mov eax, 0
|
|
mov ebx, 0
|
|
add esi, 24
|
|
jmp nf_solid
|
|
endif
|
|
xor eax, eax
|
|
lea ecx, nfpk_mov4
|
|
lea edx, byte ptr ds:nf42_11+1
|
|
|
|
mov al, [esi+4]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_11-nf42_11)], bl
|
|
mov [edx+(nf42_12-nf42_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf42_13-nf42_11)], bl
|
|
mov [edx+(nf42_14-nf42_11)], bh
|
|
|
|
mov al, [esi+5]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_15-nf42_11)], bl
|
|
mov [edx+(nf42_16-nf42_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf42_17-nf42_11)], bl
|
|
mov [edx+(nf42_18-nf42_11)], bh
|
|
|
|
|
|
mov al, [esi+8]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_31-nf42_11)], bl
|
|
mov [edx+(nf42_32-nf42_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf42_33-nf42_11)], bl
|
|
mov [edx+(nf42_34-nf42_11)], bh
|
|
|
|
mov al, [esi+9]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_35-nf42_11)], bl
|
|
mov [edx+(nf42_36-nf42_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf42_37-nf42_11)], bl
|
|
mov [edx+(nf42_38-nf42_11)], bh
|
|
|
|
lea edx, [edx+(nf42_51-nf42_11)]
|
|
|
|
mov al, [esi+16]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_51-nf42_51)], bl
|
|
mov [edx+(nf42_52-nf42_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf42_53-nf42_51)], bl
|
|
mov [edx+(nf42_54-nf42_51)], bh
|
|
|
|
mov al, [esi+17]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_55-nf42_51)], bl
|
|
mov [edx+(nf42_56-nf42_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf42_57-nf42_51)], bl
|
|
mov [edx+(nf42_58-nf42_51)], bh
|
|
|
|
|
|
mov al, [esi+20]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_71-nf42_51)], bl
|
|
mov [edx+(nf42_72-nf42_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf42_73-nf42_51)], bl
|
|
mov [edx+(nf42_74-nf42_51)], bh
|
|
|
|
mov al, [esi+21]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_75-nf42_51)], bl
|
|
mov [edx+(nf42_76-nf42_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf42_77-nf42_51)], bl
|
|
mov [edx+(nf42_78-nf42_51)], bh
|
|
|
|
|
|
; Load bl,bh,cl,ch with four colors
|
|
mov bx, [esi]
|
|
mov cx, [esi+2]
|
|
|
|
mov edx, nf_width
|
|
jmp nf42_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf42_0:
|
|
nf42_11:mov al, bl
|
|
nf42_12:mov ah, bl
|
|
shl eax, 16
|
|
nf42_13:mov al, bl
|
|
nf42_14:mov ah, bl
|
|
mov [edi], eax
|
|
|
|
nf42_15:mov al, bl
|
|
nf42_16:mov ah, bl
|
|
shl eax, 16
|
|
nf42_17:mov al, bl
|
|
nf42_18:mov ah, bl
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf42_31:mov al, bl
|
|
nf42_32:mov ah, bl
|
|
shl eax, 16
|
|
nf42_33:mov al, bl
|
|
nf42_34:mov ah, bl
|
|
mov [edi], eax
|
|
|
|
nf42_35:mov al, bl
|
|
nf42_36:mov ah, bl
|
|
shl eax, 16
|
|
nf42_37:mov al, bl
|
|
nf42_38:mov ah, bl
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
; Load bl,bh,cl,ch with four colors
|
|
mov bx, [esi+12]
|
|
mov cx, [esi+14]
|
|
|
|
nf42_51:mov al, bl
|
|
nf42_52:mov ah, bl
|
|
shl eax, 16
|
|
nf42_53:mov al, bl
|
|
nf42_54:mov ah, bl
|
|
mov [edi], eax
|
|
|
|
nf42_55:mov al, bl
|
|
nf42_56:mov ah, bl
|
|
shl eax, 16
|
|
nf42_57:mov al, bl
|
|
nf42_58:mov ah, bl
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf42_71:mov al, bl
|
|
nf42_72:mov ah, bl
|
|
shl eax, 16
|
|
nf42_73:mov al, bl
|
|
nf42_74:mov ah, bl
|
|
mov [edi], eax
|
|
|
|
nf42_75:mov al, bl
|
|
nf42_76:mov ah, bl
|
|
shl eax, 16
|
|
nf42_77:mov al, bl
|
|
nf42_78:mov ah, bl
|
|
mov [edi+4], eax
|
|
|
|
add esi, 24
|
|
sub edi, nfpk_back_right
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf11: ; 8x8x8 (64 bytes)
|
|
if 0 ;debug
|
|
add esi, 64
|
|
mov eax, 0fefefefeH
|
|
; mov ebx, eax
|
|
mov ebx, 0
|
|
jmp nf_solid
|
|
endif
|
|
mov edx, nf_width
|
|
|
|
mov eax, [esi] ;0
|
|
mov [edi], eax
|
|
mov eax, [esi+4]
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
mov eax, [esi+16] ;2
|
|
mov [edi], eax
|
|
mov eax, [esi+20]
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
mov eax, [esi+32] ;4
|
|
mov [edi], eax
|
|
mov eax, [esi+36]
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
mov eax, [esi+48] ;6
|
|
mov [edi], eax
|
|
mov eax, [esi+52]
|
|
mov [edi+4], eax
|
|
|
|
add esi, 64
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf12: ; low 4x4x8 (16 bytes)
|
|
mov edx, nf_width
|
|
|
|
mov eax, [esi]
|
|
mov bl, ah
|
|
mov bh, ah
|
|
shl ebx, 16
|
|
mov bl, al
|
|
mov bh, al
|
|
mov [edi], ebx
|
|
shr eax, 16
|
|
mov bl, ah
|
|
mov bh, ah
|
|
shl ebx, 16
|
|
mov bl, al
|
|
mov bh, al
|
|
mov [edi+4], ebx
|
|
add edi, edx
|
|
|
|
mov eax, [esi+4]
|
|
mov bl, ah
|
|
mov bh, ah
|
|
shl ebx, 16
|
|
mov bl, al
|
|
mov bh, al
|
|
mov [edi], ebx
|
|
shr eax, 16
|
|
mov bl, ah
|
|
mov bh, ah
|
|
shl ebx, 16
|
|
mov bl, al
|
|
mov bh, al
|
|
mov [edi+4], ebx
|
|
add edi, edx
|
|
|
|
mov eax, [esi+8]
|
|
mov bl, ah
|
|
mov bh, ah
|
|
shl ebx, 16
|
|
mov bl, al
|
|
mov bh, al
|
|
mov [edi], ebx
|
|
shr eax, 16
|
|
mov bl, ah
|
|
mov bh, ah
|
|
shl ebx, 16
|
|
mov bl, al
|
|
mov bh, al
|
|
mov [edi+4], ebx
|
|
add edi, edx
|
|
|
|
mov eax, [esi+12]
|
|
mov bl, ah
|
|
mov bh, ah
|
|
shl ebx, 16
|
|
mov bl, al
|
|
mov bh, al
|
|
mov [edi], ebx
|
|
shr eax, 16
|
|
mov bl, ah
|
|
mov bh, ah
|
|
shl ebx, 16
|
|
mov bl, al
|
|
mov bh, al
|
|
mov [edi+4], ebx
|
|
|
|
sub edi, nfpk_back_right
|
|
add esi, 16
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf13: ; 2x2 4x4x0 (4 bytes)
|
|
mov edx, nf_width
|
|
|
|
mov cl, [esi]
|
|
mov ch, cl
|
|
mov eax, ecx
|
|
shl eax, 16
|
|
mov ax, cx
|
|
|
|
mov cl, [esi+1]
|
|
mov ch, cl
|
|
mov ebx, ecx
|
|
shl ebx, 16
|
|
mov bx, cx
|
|
|
|
mov [edi], eax
|
|
mov [edi+4], ebx
|
|
mov [edi+edx], eax
|
|
mov [edi+edx+4], ebx
|
|
lea edi, [edi+edx*2]
|
|
|
|
mov cl, [esi+2]
|
|
mov ch, cl
|
|
mov eax, ecx
|
|
shl eax, 16
|
|
mov ax, cx
|
|
|
|
mov cl, [esi+3]
|
|
mov ch, cl
|
|
mov ebx, ecx
|
|
shl ebx, 16
|
|
mov bx, cx
|
|
|
|
mov [edi], eax
|
|
mov [edi+4], ebx
|
|
add edi, edx
|
|
mov [edi], eax
|
|
mov [edi+4], ebx
|
|
|
|
sub edi, nfpk_back_right
|
|
add esi, 4
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf14: ; 8x8x0 (1 byte)
|
|
mov bl, [esi] ; Copy color into 8 positions
|
|
inc esi
|
|
mov bh, bl
|
|
mov eax, ebx
|
|
shl eax, 16
|
|
mov ax, bx
|
|
mov ebx, eax
|
|
if 0 ;debug
|
|
mov eax, 080808080h
|
|
mov ebx, eax
|
|
endif
|
|
jmp nf_solid
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf15: ; mix 8x8x0 (2 bytes)
|
|
mov bx, [esi] ; Copy 2 colors into 8 positions
|
|
add esi, 2 ; in a checkerboard
|
|
mov ax, bx
|
|
shl eax, 16
|
|
mov ax, bx
|
|
mov ebx, eax
|
|
rol ebx, 8
|
|
if 0 ;debug
|
|
mov eax, 080808080h
|
|
mov ebx, eax
|
|
endif
|
|
nf_solid:
|
|
mov edx, nf_width
|
|
|
|
mov [edi], eax
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
mov [edi], ebx
|
|
mov [edi+4], ebx
|
|
add edi, edx
|
|
mov [edi], eax
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
mov [edi], ebx
|
|
mov [edi+4], ebx
|
|
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
nfPkDecompH ENDP
|
|
|
|
|
|
; If at least 11 palette entries aren't changed, this is more compact
|
|
; than uncompressed 256 entry palette.
|
|
;
|
|
;static void palLoadCompPalette(unsigned char *buf)
|
|
;
|
|
palLoadCompPalette PROC USES ESI EDI, \
|
|
buf: PTRBYTE
|
|
mov ax, ds ; Insure es==ds for symantec flat mode
|
|
mov es, ax
|
|
|
|
mov cx, 32
|
|
mov esi, buf
|
|
mov edi, offset pal_tbl
|
|
next: lodsb
|
|
or al, al
|
|
jnz chk0
|
|
add edi, 24
|
|
loop next
|
|
jmp done
|
|
|
|
chk0: test al, 1
|
|
jz not0
|
|
movsw
|
|
movsb
|
|
test al, 2
|
|
jz not1
|
|
cpy1: movsw
|
|
movsb
|
|
test al, 4
|
|
jz not2
|
|
cpy2: movsw
|
|
movsb
|
|
test al, 8
|
|
jz not3
|
|
cpy3: movsw
|
|
movsb
|
|
test al, 16
|
|
jz not4
|
|
cpy4: movsw
|
|
movsb
|
|
test al, 32
|
|
jz not5
|
|
cpy5: movsw
|
|
movsb
|
|
test al, 64
|
|
jz not6
|
|
cpy6: movsw
|
|
movsb
|
|
or al, al
|
|
jns not7
|
|
cpy7: movsw
|
|
movsb
|
|
loop next
|
|
jmp done
|
|
|
|
not0: add edi, 3
|
|
test al, 2
|
|
jnz cpy1
|
|
not1: add edi, 3
|
|
test al, 4
|
|
jnz cpy2
|
|
not2: add edi, 3
|
|
test al, 8
|
|
jnz cpy3
|
|
not3: add edi, 3
|
|
test al, 16
|
|
jnz cpy4
|
|
not4: add edi, 3
|
|
test al, 32
|
|
jnz cpy5
|
|
not5: add edi, 3
|
|
test al, 64
|
|
jnz cpy6
|
|
not6: add edi, 3
|
|
or al, al
|
|
js cpy7
|
|
not7: add edi, 3
|
|
loop next
|
|
|
|
done: ret
|
|
|
|
palLoadCompPalette ENDP
|
|
|
|
EXTERN snd_8to16: WORD ; short snd_8to16[256];
|
|
|
|
;unsigned sndDecompM16(unsigned short *dst, const unsigned char *src,
|
|
; unsigned len, unsigned prev);
|
|
;
|
|
;Decompresses a mono stream containing len samples
|
|
;(src is len bytes, dst is len*2 bytes)
|
|
;prev is the previous decompression state or zero.
|
|
;Returns new decompression state.
|
|
;
|
|
sndDecompM16 PROC USES ESI EDI EBX, \
|
|
dst:PTRWORD, src:PTRBYTE, len:DWORD, prev:DWORD
|
|
mov eax, prev
|
|
|
|
mov ecx, len
|
|
jecxz done
|
|
|
|
mov esi, src
|
|
mov edi, dst
|
|
|
|
xor ebx, ebx
|
|
|
|
lp: mov bl, byte ptr [esi]
|
|
add esi, 1
|
|
add ax, word ptr snd_8to16[ebx*2]
|
|
mov word ptr [edi], ax
|
|
add edi, 2
|
|
dec ecx
|
|
jnz lp
|
|
|
|
done: ret
|
|
sndDecompM16 ENDP
|