mirror of
https://github.com/kevinbentley/Descent3.git
synced 2025-01-22 11:28:56 +00:00
15055 lines
260 KiB
NASM
15055 lines
260 KiB
NASM
; mvelibwa.c
|
|
;
|
|
; Interplay Movie (MVE) File Player Library (32-Bit Win95 Version)
|
|
; Assembly Language Components
|
|
; Written by Paul Allen Edelstein
|
|
;
|
|
; (c) 1997 Interplay Productions. All Rights Reserved.
|
|
; This file is confidential and consists of proprietary information
|
|
; of Interplay Productions. This file and associated libraries
|
|
; may not, in whole or in part, be disclosed to third parties,
|
|
; incorporated into any software product which is not being created
|
|
; for Interplay Productions, copied or duplicated in any form,
|
|
; without the prior written permission of Interplay Productions.
|
|
; Further, you may not reverse engineer, decompile or otherwise
|
|
; attempt to derive source code of this material.
|
|
;
|
|
|
|
; .386
|
|
.486 ; I only need .386, but I wanted the 486 cycle timings
|
|
ifdef SYMANTEC
|
|
.MODEL SMALL, C
|
|
DGROUP group _TEXT, _DATA
|
|
else
|
|
.MODEL FLAT, C
|
|
endif
|
|
|
|
;;--- Options ---
|
|
|
|
ONLYNEW equ 0 ; For debug, disables motion comp
|
|
LOGGING equ 0 ; Log timing statistics
|
|
PARTIAL equ 1 ; Support for partial updates
|
|
PKDATA equ 1 ; Support for packed data
|
|
HICOLOR equ 1 ; Support for HiColor
|
|
INTERP equ 0 ; Interpolated squares
|
|
; 0:none (4x4x8), 1:generic dither,
|
|
; 2:direction dither, 3:blend
|
|
COMPOPS equ 1 ; Compressed opcode table
|
|
SCALING equ 1 ; Scaling support
|
|
DECOMPD equ 0 ; Support for dithered half vert res
|
|
TRANS16 equ 1 ; Support for translating 16-bit rgb format
|
|
|
|
;;--- Types ---
|
|
|
|
PTRBYTE TYPEDEF PTR BYTE
|
|
PTRWORD TYPEDEF PTR WORD
|
|
PTRDWORD TYPEDEF PTR DWORD
|
|
PTRPROC TYPEDEF PTR PROC
|
|
|
|
;;--- Constants ---
|
|
|
|
; Width and height of sections in pixels.
|
|
SWIDTH equ 8
|
|
SHEIGHT equ 8
|
|
|
|
LOG2_SWIDTH equ 3
|
|
LOG2_SHEIGHT equ 3
|
|
|
|
;;---
|
|
|
|
EXTERN pal_tbl:BYTE ; unsigned char pal_tbl[3*256];
|
|
EXTERN pal15_tbl:WORD ; unsigned short pal15_tbl[256];
|
|
if INTERP eq 3
|
|
EXTERN blend_tbl: PTRDWORD ; unsigned *blend_tbl;
|
|
endif
|
|
.data
|
|
|
|
BYTE "(c) 1997 Interplay Productions. All Rights Reserved.\n"
|
|
BYTE "This file is confidential and consists of proprietary information\n"
|
|
BYTE "of Interplay Productions. This file and associated libraries\n"
|
|
BYTE "may not, in whole or in part, be disclosed to third parties,\n"
|
|
BYTE "incorporated into any software product which is not being created\n"
|
|
BYTE "for Interplay Productions, copied or duplicated in any form,\n"
|
|
BYTE "without the prior written permission of Interplay Productions.\n"
|
|
BYTE "Further, you may not reverse engineer, decompile or otherwise\n"
|
|
BYTE "attempt to derive source code of this material.\n",0
|
|
|
|
.code
|
|
|
|
PUBLIC mveliba_start, mveliba_end
|
|
|
|
mveliba_start:
|
|
|
|
;----------------------------------------------------------------------
|
|
; Logging Support
|
|
;-----------------
|
|
|
|
if LOGGING
|
|
|
|
;void logLabel(char *label)
|
|
;
|
|
logLabel PROTO lbl:PTRBYTE
|
|
|
|
LOG_LABEL MACRO msg
|
|
LOCAL lbl
|
|
.data
|
|
lbl BYTE msg,0
|
|
.code
|
|
INVOKE logLabel, offset lbl
|
|
ENDM
|
|
|
|
else
|
|
|
|
LOG_LABEL MACRO msg
|
|
ENDM
|
|
|
|
endif
|
|
|
|
;--------------------------------------------------------------------
|
|
; Sound Management
|
|
;--------------------
|
|
|
|
EXTERN snd_8to16: WORD ; short snd_8to16[256];
|
|
|
|
;unsigned sndDecompM16(unsigned short *dst, unsigned char *src,
|
|
; unsigned len, unsigned prev);
|
|
;
|
|
;Decompresses a mono stream containing len samples
|
|
;(src is len bytes, dst is len*2 bytes)
|
|
;prev is the previous decompression state or zero.
|
|
;Returns new decompression state.
|
|
;
|
|
sndDecompM16 PROC USES ESI EDI EBX, \
|
|
dst:PTRWORD, src:PTRBYTE, len:DWORD, prev:DWORD
|
|
mov eax, prev
|
|
|
|
mov ecx, len
|
|
jecxz done
|
|
|
|
mov esi, src
|
|
mov edi, dst
|
|
|
|
xor ebx, ebx
|
|
|
|
lp: mov bl, byte ptr [esi]
|
|
add esi, 1
|
|
add ax, word ptr snd_8to16[ebx*2]
|
|
mov word ptr [edi], ax
|
|
add edi, 2
|
|
dec ecx
|
|
jnz lp
|
|
|
|
done: ret
|
|
sndDecompM16 ENDP
|
|
|
|
;unsigned sndDecompS16(unsigned short *dst, unsigned char *src,
|
|
; unsigned len, unsigned prev);
|
|
;
|
|
;Decompresses a stereo stream containing len samples
|
|
;(src is len*2 bytes, dst is len*4 bytes)
|
|
;prev is the previous decompression state or zero
|
|
; (It encodes the 16-bit states of the two stereo channels
|
|
; in its low and high order 16-bit halves.)
|
|
;Returns new decompression state.
|
|
;
|
|
sndDecompS16 PROC USES ESI EDI EBX, \
|
|
dst:PTRWORD, src:PTRBYTE, len:DWORD, prev:DWORD
|
|
movzx eax, word ptr prev
|
|
movzx edx, word ptr prev+2
|
|
|
|
mov ecx, len
|
|
jecxz done
|
|
|
|
mov esi, src
|
|
mov edi, dst
|
|
|
|
xor ebx, ebx
|
|
|
|
lp: mov bl, byte ptr [esi]
|
|
add esi, 1
|
|
add ax, word ptr snd_8to16[ebx*2]
|
|
mov word ptr [edi], ax
|
|
add edi, 2
|
|
|
|
mov bl, byte ptr [esi]
|
|
add esi, 1
|
|
add dx, word ptr snd_8to16[ebx*2]
|
|
mov word ptr [edi], dx
|
|
add edi, 2
|
|
|
|
dec ecx
|
|
jnz lp
|
|
|
|
done: shl edx, 16
|
|
or eax, edx
|
|
ret
|
|
sndDecompS16 ENDP
|
|
|
|
;--------------------------------------------------------------------
|
|
; NextFrame (Video Decompression)
|
|
;----------------------------------
|
|
|
|
;; NextFrame working storage
|
|
; MemRec nf_mem_buf1;
|
|
; MemRec nf_mem_buf2;
|
|
EXTERN nf_buf_cur: PTRBYTE ; unsigned char* nf_buf_cur;
|
|
EXTERN nf_buf_prv: PTRBYTE ; unsigned char* nf_buf_prv;
|
|
|
|
;; NextFrame parameters
|
|
EXTERN nf_wqty: BYTE ;unsigned char nf_wqty; // (width/SWIDTH)
|
|
EXTERN nf_hqty: BYTE ;unsigned char nf_hqty; // (height/SHEIGHT)
|
|
EXTERN nf_fqty: BYTE ;unsigned char nf_fqty; // Number of fields
|
|
if HICOLOR
|
|
EXTERN nf_hicolor: DWORD ;unsigned nf_hicolor; // HiColor (0:none,1:normal,2:swapped)
|
|
endif
|
|
;; <derived quantities>
|
|
EXTERN nf_width: DWORD ;unsigned nf_width; // wqty * SWIDTH
|
|
EXTERN nf_height: DWORD ;unsigned nf_height; // hqty * SHEIGHT;
|
|
EXTERN nf_new_line: DWORD ;unsigned nf_new_line; // width - SWIDTH
|
|
EXTERN nf_new_row0: DWORD ;unsigned nf_new_row0; // SHEIGHT*width*2-width
|
|
EXTERN nf_back_right: DWORD ;unsigned nf_back_right; // (SHEIGHT-1)*width
|
|
|
|
;; Frame parameters
|
|
;; Portion of current frame which has been updated
|
|
;; and needs to be sent to screen.
|
|
;;
|
|
EXTERN nf_new_x: DWORD ;unsigned nf_new_x;
|
|
EXTERN nf_new_y: DWORD ;unsigned nf_new_y;
|
|
EXTERN nf_new_w: DWORD ;unsigned nf_new_w;
|
|
EXTERN nf_new_h: DWORD ;unsigned nf_new_h;
|
|
|
|
|
|
NF_DECOMP_INIT MACRO HI_COLOR_FLAG: REQ
|
|
|
|
mov ax, ds ; Insure es==ds for symantec flat mode
|
|
mov es, ax
|
|
|
|
mov eax, nf_buf_prv ; DiffBufPtrs = nf_buf_prv - nf_buf_cur
|
|
sub eax, nf_buf_cur
|
|
mov DiffBufPtrs, eax
|
|
|
|
xor ebx, ebx ; ebx = nf_fqty (convert to 32-bits)
|
|
mov bl, nf_fqty
|
|
|
|
mov eax, x ; nf_new_x = x*SWIDTH*2^HI_COLOR_FLAG;
|
|
shl eax, LOG2_SWIDTH+HI_COLOR_FLAG
|
|
mov nf_new_x, eax
|
|
|
|
mov eax, w ; nf_new_w = w*SWIDTH*2^HI_COLOR_FLAG;
|
|
shl eax, LOG2_SWIDTH+HI_COLOR_FLAG
|
|
mov nf_new_w, eax
|
|
|
|
mov eax, y ; nf_new_y = y*nf_fqty*SHEIGHT;
|
|
shl eax, LOG2_SHEIGHT
|
|
mul ebx ;nf_fqty
|
|
mov nf_new_y, eax
|
|
|
|
mov eax, h ; nf_new_h = h*nf_fqty*SHEIGHT;
|
|
shl eax, LOG2_SHEIGHT
|
|
mul ebx ;nf_fqty
|
|
mov nf_new_h, eax
|
|
|
|
mov eax, nf_new_row0 ; new_row = nf_new_row0 - nf_new_w;
|
|
sub eax, nf_new_w
|
|
mov new_row, eax
|
|
|
|
;; Move to correct place in current buffer
|
|
mov eax, nf_buf_cur ; tbuf = nf_buf_cur
|
|
mov tbuf, eax
|
|
.if x || y ; if (x||y)
|
|
mov eax, nf_new_y ; tbuf += nf_new_y*nf_width + nf_new_x;
|
|
mul nf_width
|
|
add eax, nf_new_x
|
|
add tbuf, eax
|
|
.endif
|
|
|
|
ENDM ; DECOMP_INIT
|
|
|
|
|
|
DECOMP_BODY MACRO HI_COLOR_FLAG:REQ
|
|
|
|
LOCAL HI_COLOR_SCALE
|
|
HI_COLOR_SCALE equ HI_COLOR_FLAG+1
|
|
|
|
NF_DECOMP_INIT HI_COLOR_FLAG
|
|
|
|
mov eax, w ; parms_sz = (w*h*nf_fqty)<<1
|
|
mul h
|
|
mul ebx ;nf_fqty
|
|
shl eax, 1
|
|
mov parms_sz, eax
|
|
|
|
; esi indexes comp (to get new section data)
|
|
; edi indexes current screen buffer
|
|
; edx is a frequently used constant
|
|
; ebx indexes section params
|
|
mov edi, tbuf
|
|
mov edx, nf_new_line ; width - SWIDTH
|
|
mov ebx, comp ; Parms index
|
|
mov esi, ebx
|
|
add esi, parms_sz ; Skip over flags (w*h*2)
|
|
|
|
; Iterate over params and copy new hires data to appropriate sections.
|
|
mov cl, nf_fqty
|
|
ns_0f: push ecx
|
|
push edi
|
|
mov ch, byte ptr h
|
|
ns_0: mov cl, byte ptr w
|
|
ns_1: cmp word ptr [ebx],0
|
|
je ns_10
|
|
add edi, SWIDTH*HI_COLOR_SCALE
|
|
ns_2: add ebx, 2
|
|
dec cl
|
|
jnz ns_1
|
|
add edi, new_row ; SHEIGHT*width - SWIDTH*w
|
|
dec ch
|
|
jnz ns_0
|
|
pop edi
|
|
pop ecx
|
|
add edi, nf_width
|
|
dec cl
|
|
jnz ns_0f
|
|
jmp ns_99
|
|
|
|
; Copy new data to one section
|
|
; Enter with esi pointing to source data, edi to screen section.
|
|
|
|
; Assumes SWIDTH=8 (16-bit data) and SHEIGHT=8
|
|
ns_10:
|
|
REPEAT 7
|
|
REPEAT 2*HI_COLOR_SCALE
|
|
movsd
|
|
ENDM
|
|
add edi, edx
|
|
ENDM
|
|
REPEAT 2*HI_COLOR_SCALE
|
|
movsd
|
|
ENDM
|
|
|
|
sub edi, nf_back_right ; (SHEIGHT-1)*width
|
|
jmp ns_2
|
|
|
|
ns_99:
|
|
|
|
ife ONLYNEW ; if !ONLYNEW
|
|
; Iterate over flags and motion source addresses from params
|
|
; to determine which sections to move.
|
|
; ebx indexes params.
|
|
; esi indexes source from buffer
|
|
; esi will be computed as +- 16K relative to edi.
|
|
|
|
sub ebx, parms_sz ; Move back to start of section parms
|
|
mov edi, tbuf
|
|
mov cl, nf_fqty
|
|
xor esi, esi
|
|
ms_0f: push ecx
|
|
push edi
|
|
mov ch, byte ptr h
|
|
ms_0: mov cl, byte ptr w
|
|
ms_1: or si, [ebx]
|
|
jg ms_10
|
|
jl ms_j30
|
|
add edi, SWIDTH*HI_COLOR_SCALE
|
|
ms_2: add ebx, 2
|
|
dec cl
|
|
jnz ms_1
|
|
add edi, new_row ; SHEIGHT*width - SWIDTH*w
|
|
dec ch
|
|
jnz ms_0
|
|
pop edi
|
|
pop ecx
|
|
add edi, nf_width
|
|
dec cl
|
|
jnz ms_0f
|
|
jmp ms_99
|
|
|
|
ms_j30: jmp ms_30
|
|
|
|
; Move one section from current screen to current screen.
|
|
; Enter with
|
|
; edi pointing to destination screen section,
|
|
; relative value of source offset in esi.
|
|
|
|
; The following assumes SWIDTH==8 and SHEIGHT==8
|
|
|
|
ms_10: ; Make esi absolute
|
|
lea esi, [esi*HI_COLOR_SCALE-04000h*HI_COLOR_SCALE+edi]
|
|
REPEAT 7
|
|
REPEAT 2*HI_COLOR_SCALE
|
|
movsd
|
|
ENDM
|
|
add esi, edx
|
|
add edi, edx
|
|
ENDM
|
|
REPEAT 2*HI_COLOR_SCALE
|
|
movsd
|
|
ENDM
|
|
|
|
sub edi, nf_back_right ; (SHEIGHT-1)*width
|
|
xor esi, esi ; Reset esi to zero
|
|
jmp ms_2
|
|
|
|
|
|
ms_20f: push ecx
|
|
push edi
|
|
mov ch, byte ptr h
|
|
ms_20: mov cl, byte ptr w
|
|
ms_21: or si, [ebx]
|
|
jl ms_30
|
|
jg ms_j10
|
|
add edi, SWIDTH*HI_COLOR_SCALE
|
|
ms_22: add ebx, 2
|
|
dec cl
|
|
jnz ms_21
|
|
add edi, new_row ; SHEIGHT*width - SWIDTH*w
|
|
dec ch
|
|
jnz ms_20
|
|
pop edi
|
|
pop ecx
|
|
add edi, nf_width
|
|
dec cl
|
|
jnz ms_20f
|
|
jmp ms_99
|
|
|
|
ms_j10: jmp ms_10
|
|
|
|
; Move one section from previous screen to current screen.
|
|
; Enter with
|
|
; edi pointing to destination screen section,
|
|
; relative value of source offset in esi.
|
|
|
|
; The following assumes SWIDTH==8 and SHEIGHT==8
|
|
|
|
ms_30: ; Make esi absolute
|
|
lea esi, [esi*HI_COLOR_SCALE-0C000h*HI_COLOR_SCALE+edi]
|
|
add esi, DiffBufPtrs ; and point to other buffer
|
|
|
|
REPEAT 7
|
|
REPEAT 2*HI_COLOR_SCALE
|
|
movsd
|
|
ENDM
|
|
add esi, edx
|
|
add edi, edx
|
|
ENDM
|
|
REPEAT 2*HI_COLOR_SCALE
|
|
movsd
|
|
ENDM
|
|
|
|
sub edi, nf_back_right ; (SHEIGHT-1)*width
|
|
xor esi, esi ; Reset esi to zero
|
|
jmp ms_22
|
|
|
|
ms_99:
|
|
endif ; #endif !ONLYNEW
|
|
ENDM ; DECOMP_BODY
|
|
|
|
if PARTIAL
|
|
|
|
DECOMP_CHG_BODY MACRO HI_COLOR_FLAG:REQ
|
|
|
|
LOCAL HI_COLOR_SCALE
|
|
HI_COLOR_SCALE equ HI_COLOR_FLAG+1
|
|
|
|
NF_DECOMP_INIT HI_COLOR_FLAG
|
|
|
|
; esi indexes comp (to get new section data)
|
|
; edi indexes current screen buffer
|
|
; edx is a frequently used constant
|
|
; ebx indexes section params
|
|
mov edi, tbuf
|
|
mov edx, nf_new_line ; width - SWIDTH
|
|
mov esi, comp
|
|
mov ebx, parms
|
|
|
|
; Iterate over params and copy new hires data to appropriate sections.
|
|
|
|
mov eax, chgs
|
|
mov pChgs, eax
|
|
mov eax, 0
|
|
mov cl, nf_fqty
|
|
ns_0f: push ecx
|
|
push edi
|
|
mov ch, byte ptr h
|
|
ns_0: mov cl, byte ptr w
|
|
ns_1: add ax, ax
|
|
ja ns_1b
|
|
jz ns_5
|
|
cmp word ptr [ebx],0
|
|
je ns_10
|
|
add ebx, 2
|
|
ns_1b: add edi, SWIDTH*HI_COLOR_SCALE
|
|
ns_2: dec cl
|
|
jnz ns_1
|
|
add edi, new_row ; SHEIGHT*width - SWIDTH*w
|
|
dec ch
|
|
jnz ns_0
|
|
pop edi
|
|
pop ecx
|
|
add edi, nf_width
|
|
dec cl
|
|
jnz ns_0f
|
|
jmp ns_99
|
|
|
|
ns_5: mov eax, pChgs
|
|
add pChgs, 2
|
|
mov ax, [eax]
|
|
jmp ns_1
|
|
|
|
; Copy new data to one section
|
|
; Enter with ds:si pointing to source data, es:di to screen section.
|
|
|
|
; Assumes SWIDTH=8 (16-bit data) and SHEIGHT=8
|
|
ns_10:
|
|
REPEAT 7
|
|
REPEAT 2*HI_COLOR_SCALE
|
|
movsd
|
|
ENDM
|
|
add edi, edx
|
|
ENDM
|
|
REPEAT 2*HI_COLOR_SCALE
|
|
movsd
|
|
ENDM
|
|
|
|
sub edi, nf_back_right ; (SHEIGHT-1)*width
|
|
add ebx, 2
|
|
jmp ns_2
|
|
|
|
ns_99:
|
|
|
|
ife ONLYNEW ; if !ONLYNEW
|
|
; Iterate over flags and motion source addresses from params
|
|
; to determine which sections to move.
|
|
; ebx indexes params.
|
|
; esi indexes source from buffer
|
|
; esi will be computed as +- 16K relative to edi.
|
|
|
|
mov edi, tbuf
|
|
mov ebx, parms
|
|
|
|
mov eax, chgs
|
|
mov pChgs, eax
|
|
mov eax, 0
|
|
mov cl, byte ptr nf_fqty
|
|
xor esi, esi
|
|
ms_0f: push ecx
|
|
push edi
|
|
mov ch, byte ptr h
|
|
ms_0: mov cl, byte ptr w
|
|
ms_1: add ax, ax
|
|
ja ms_1b
|
|
jz ms_5
|
|
or si, [ebx]
|
|
jg ms_10
|
|
jl ms_j30
|
|
add ebx, 2
|
|
ms_1b: add edi, SWIDTH*HI_COLOR_SCALE
|
|
ms_2: dec cl
|
|
jnz ms_1
|
|
add edi, new_row ; SHEIGHT*width - SWIDTH*w
|
|
dec ch
|
|
jnz ms_0
|
|
pop edi
|
|
pop ecx
|
|
add edi, nf_width
|
|
dec cl
|
|
jnz ms_0f
|
|
jmp ms_99
|
|
|
|
ms_5: mov eax, pChgs
|
|
add pChgs, 2
|
|
mov ax, word ptr [eax]
|
|
jmp ms_1
|
|
|
|
|
|
ms_j30: jmp ms_30
|
|
|
|
; Move one section from current screen to current screen.
|
|
; Enter with
|
|
; edi pointing to destination screen section,
|
|
; relative value of source offset in esi.
|
|
|
|
; The following assumes SWIDTH==8 and SHEIGHT==8
|
|
|
|
ms_10: ; Make esi absolute
|
|
lea esi, [esi*HI_COLOR_SCALE-04000h*HI_COLOR_SCALE+edi]
|
|
REPEAT 7
|
|
REPEAT 2*HI_COLOR_SCALE
|
|
movsd
|
|
ENDM
|
|
add esi, edx
|
|
add edi, edx
|
|
ENDM
|
|
REPEAT 2*HI_COLOR_SCALE
|
|
movsd
|
|
ENDM
|
|
|
|
sub edi, nf_back_right ; (SHEIGHT-1)*width
|
|
xor esi, esi ; Reset esi to zero
|
|
add ebx, 2
|
|
jmp ms_2
|
|
|
|
|
|
ms_20f: push ecx
|
|
push edi
|
|
mov ch, byte ptr h
|
|
ms_20: mov cl, byte ptr w
|
|
ms_21: add ax, ax
|
|
ja ms_21b
|
|
jz ms_25
|
|
or si, [ebx]
|
|
jl ms_30
|
|
jg ms_j10
|
|
add ebx, 2
|
|
ms_21b: add edi, SWIDTH*HI_COLOR_SCALE
|
|
ms_22: dec cl
|
|
jnz ms_21
|
|
add edi, new_row ; SHEIGHT*width - SWIDTH*w
|
|
dec ch
|
|
jnz ms_20
|
|
pop edi
|
|
pop ecx
|
|
add edi, nf_width
|
|
dec cl
|
|
jnz ms_20f
|
|
jmp ms_99
|
|
|
|
ms_25: mov eax, pChgs
|
|
add pChgs, 2
|
|
mov ax, [eax]
|
|
jmp ms_21
|
|
|
|
ms_j10: jmp ms_10
|
|
|
|
; Move one section from previous screen to current screen.
|
|
; Enter with
|
|
; edi pointing to destination screen section,
|
|
; relative value of source offset in esi.
|
|
|
|
; The following assumes SWIDTH==8 and SHEIGHT==8
|
|
|
|
ms_30: ; Make esi absolute
|
|
lea esi, [esi*HI_COLOR_SCALE-0C000h*HI_COLOR_SCALE+edi]
|
|
add esi, DiffBufPtrs ; and point to other buffer
|
|
|
|
REPEAT 7
|
|
REPEAT 2*HI_COLOR_SCALE
|
|
movsd
|
|
ENDM
|
|
add esi, edx
|
|
add edi, edx
|
|
ENDM
|
|
REPEAT 2*HI_COLOR_SCALE
|
|
movsd
|
|
ENDM
|
|
|
|
sub edi, nf_back_right ; (SHEIGHT-1)*width
|
|
add ebx, 2
|
|
xor esi, esi ; Reset esi to zero
|
|
jmp ms_22
|
|
|
|
ms_99:
|
|
endif ; !ONLYNEW
|
|
|
|
ENDM ; DECOMP_CHG_BODY
|
|
|
|
endif ; PARTIAL
|
|
|
|
;;--- HiColor versions
|
|
|
|
if HICOLOR
|
|
|
|
; Decompress into subsection of current buffer specified
|
|
; by x,y,w,h in units of SWIDTHxSHEIGHT (8x8).
|
|
;
|
|
|
|
;void
|
|
;nfHiColorDecomp(unsigned char *comp,
|
|
; unsigned x, unsigned y, unsigned w, unsigned h)
|
|
;
|
|
nfHiColorDecomp PROC USES ESI EDI EBX, \
|
|
comp:PTRBYTE, \
|
|
x:DWORD, y:DWORD, w:DWORD, h:DWORD
|
|
LOCAL tbuf: PTRBYTE
|
|
LOCAL new_row: DWORD
|
|
LOCAL DiffBufPtrs: DWORD
|
|
|
|
LOCAL parms_sz: DWORD
|
|
|
|
LOG_LABEL "StartHiColorDecomp"
|
|
DECOMP_BODY 1 ; HiColor
|
|
LOG_LABEL "EndHiColorDecomp"
|
|
|
|
ret
|
|
nfHiColorDecomp ENDP
|
|
|
|
if PARTIAL
|
|
|
|
; Decompress into subsection of current buffer specified
|
|
; by x,y,w,h in units of SWIDTHxSHEIGHT (8x8).
|
|
; Chgs specifies which squares to update.
|
|
; Parms are motion parms for squares to update.
|
|
;
|
|
;void
|
|
;nfHiColorDecompChg(unsigned short *chgs,
|
|
; unsigned short *parms,
|
|
; unsigned char *comp,
|
|
; unsigned x, unsigned y, unsigned w, unsigned h)
|
|
;
|
|
nfHiColorDecompChg PROC USES ESI EDI EBX, \
|
|
chgs:PTRWORD, \
|
|
parms:PTRWORD, \
|
|
comp:PTRBYTE, \
|
|
x:DWORD, y:DWORD, w:DWORD, h:DWORD
|
|
LOCAL tbuf: PTRBYTE
|
|
LOCAL new_row: DWORD
|
|
LOCAL DiffBufPtrs: DWORD
|
|
|
|
LOCAL pChgs: PTRBYTE
|
|
|
|
LOG_LABEL "StartHiColorDecompChg"
|
|
DECOMP_CHG_BODY 1 ; HiColor
|
|
LOG_LABEL "EndHiColorDecompChg"
|
|
ret
|
|
nfHiColorDecompChg ENDP
|
|
|
|
|
|
endif ; PARTIAL
|
|
|
|
|
|
endif ; HICOLOR
|
|
|
|
; Non-HiColor versions
|
|
|
|
; Decompress into subsection of current buffer specified
|
|
; by x,y,w,h in units of SWIDTHxSHEIGHT (8x8).
|
|
;
|
|
;void nfDecomp(unsigned char *comp,
|
|
; unsigned x, unsigned y, unsigned w, unsigned h)
|
|
;
|
|
nfDecomp PROC USES ESI EDI EBX, \
|
|
comp:PTRBYTE, \
|
|
x:DWORD, y:DWORD, w:DWORD, h:DWORD
|
|
LOCAL tbuf: PTRBYTE
|
|
LOCAL new_row: DWORD
|
|
LOCAL DiffBufPtrs: DWORD
|
|
|
|
LOCAL parms_sz: DWORD
|
|
|
|
if HICOLOR
|
|
.if nf_hicolor
|
|
INVOKE nfHiColorDecomp, comp,x,y,w,h
|
|
ret
|
|
.endif
|
|
endif
|
|
|
|
LOG_LABEL "StartDecomp"
|
|
DECOMP_BODY 0 ; Not HiColor
|
|
LOG_LABEL "EndDecomp"
|
|
|
|
ret
|
|
nfDecomp ENDP
|
|
|
|
if PARTIAL
|
|
|
|
; Decompress into subsection of current buffer specified
|
|
; by x,y,w,h in units of SWIDTHxSHEIGHT (8x8).
|
|
; Chgs specifies which squares to update.
|
|
; Parms are motion parms for squares to update.
|
|
;
|
|
;void
|
|
;nfDecompChg(unsigned short *chgs,
|
|
; unsigned short *parms,
|
|
; unsigned char *comp,
|
|
; unsigned x, unsigned y, unsigned w, unsigned h)
|
|
;
|
|
nfDecompChg PROC USES ESI EDI EBX, \
|
|
chgs:PTRWORD, \
|
|
parms:PTRWORD, \
|
|
comp:PTRBYTE, \
|
|
x:DWORD, y:DWORD, w:DWORD, h:DWORD
|
|
LOCAL tbuf: PTRBYTE
|
|
LOCAL new_row: DWORD
|
|
LOCAL DiffBufPtrs: DWORD
|
|
|
|
LOCAL pChgs: PTRBYTE
|
|
|
|
if HICOLOR
|
|
.if nf_hicolor
|
|
INVOKE nfHiColorDecompChg, chgs,parms,comp,x,y,w,h
|
|
ret
|
|
.endif
|
|
endif
|
|
|
|
LOG_LABEL "StartDecompChg"
|
|
DECOMP_CHG_BODY 0 ; Not HiColor
|
|
LOG_LABEL "EndDecompChg"
|
|
|
|
ret
|
|
nfDecompChg ENDP
|
|
|
|
|
|
endif ; PARTIAL
|
|
|
|
;----------------------------------------------------------------------
|
|
|
|
if PKDATA
|
|
.data
|
|
|
|
if (INTERP eq 1) or (INTERP eq 2) ; *** Old version for dithering ***
|
|
; luminace table for palette entries
|
|
lum_tbl DWORD 256 DUP (0)
|
|
endif
|
|
|
|
; signed 8-bit y * nf_width
|
|
nfpk_ShiftY DWORD 256 DUP (0)
|
|
|
|
; Constant tables
|
|
|
|
; 8-bit -8:7 x nf_width + -8:7
|
|
nfpk_ShiftP1 LABEL WORD
|
|
FOR y, <-8,-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7>
|
|
FOR x, <-8,-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7>
|
|
BYTE x,y
|
|
ENDM
|
|
ENDM
|
|
|
|
; 8-bit to right and below in roughly 0:14*nf_width + -14:14 (-3 cases)
|
|
; negative is
|
|
; 8-bit to left and above in roughly -14:0*nf_width + -14:14 (-3 cases)
|
|
nfpk_ShiftP2 LABEL WORD
|
|
FOR y, <0,1,2,3,4,5,6,7>
|
|
FOR x, <8,9,10,11,12,13,14>
|
|
BYTE x,y
|
|
ENDM
|
|
ENDM
|
|
FOR y, <8,9,10,11,12,13>
|
|
FOR x, <-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1>
|
|
BYTE x,y
|
|
ENDM
|
|
FOR x, <0,1,2,3,4,5,6,7,8,9,10,11,12,13,14>
|
|
BYTE x,y
|
|
ENDM
|
|
ENDM
|
|
FOR x, <-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1>
|
|
BYTE x,14
|
|
ENDM
|
|
FOR x, <0,1,2,3,4,5,6,7,8,9,10,11>
|
|
BYTE x,14
|
|
ENDM
|
|
|
|
nfpk_mov4l LABEL DWORD
|
|
; mov ax, bx,cx
|
|
MOV4L_REGS TEXTEQU <!<0c0h+3,0c0h+1!>>
|
|
%FOR m4, MOV4L_REGS
|
|
% FOR m3, MOV4L_REGS
|
|
% FOR m2, MOV4L_REGS
|
|
% FOR m1, MOV4L_REGS
|
|
BYTE m2,m1,m4,m3
|
|
ENDM
|
|
ENDM
|
|
ENDM
|
|
ENDM
|
|
|
|
nfpk_mov8 LABEL DWORD
|
|
; mov ax, bx/dx/cx/bp
|
|
MOV8_REGS TEXTEQU <!<0c0h+3,0c0h+2,0c0h+1,0c0h+5!>>
|
|
%FOR m4, MOV8_REGS
|
|
% FOR m3, MOV8_REGS
|
|
% FOR m2, MOV8_REGS
|
|
% FOR m1, MOV8_REGS
|
|
BYTE m2,m1,m4,m3
|
|
ENDM
|
|
ENDM
|
|
ENDM
|
|
ENDM
|
|
|
|
nfpk_mov4 LABEL DWORD
|
|
; mov al, bl/bh/cl/ch
|
|
MOV4_REGS0 TEXTEQU <!<0c0h+3,0c0h+7,0c0h+1,0c0h+5!>>
|
|
; mov ah, bl/bh/cl/ch
|
|
MOV4_REGS1 TEXTEQU <!<0e0h+3,0e0h+7,0e0h+1,0e0h+5!>>
|
|
%FOR m4, MOV4_REGS1
|
|
% FOR m3, MOV4_REGS0
|
|
% FOR m2, MOV4_REGS1
|
|
% FOR m1, MOV4_REGS0
|
|
BYTE m3,m4,m1,m2
|
|
ENDM
|
|
ENDM
|
|
ENDM
|
|
ENDM
|
|
|
|
.code
|
|
|
|
; nfPkConfig initializes tables used by nfPkDecomp
|
|
; which are dependent on screen size.
|
|
nfPkConfig PROC USES ESI EDI EBX
|
|
|
|
; Build ShiftY table
|
|
;
|
|
lea edi, nfpk_ShiftY
|
|
mov ebx, nf_width
|
|
|
|
mov eax, 0
|
|
mov ecx, 128
|
|
lp1: mov [edi], eax
|
|
add edi,4
|
|
add eax,ebx
|
|
dec ecx
|
|
jne lp1
|
|
|
|
mov eax, ebx
|
|
shl eax, 7
|
|
neg eax
|
|
mov ecx, 128
|
|
lp2: mov [edi], eax
|
|
add edi,4
|
|
add eax,ebx
|
|
dec ecx
|
|
jne lp2
|
|
|
|
ret
|
|
nfPkConfig ENDP
|
|
|
|
if (INTERP eq 1) or (INTERP eq 2)
|
|
; nfPkPal initializes tables used by nfPkDecomp
|
|
; which are dependent on palette.
|
|
nfPkPal PROC USES ESI EDI EBX
|
|
|
|
; Build palette luminance table
|
|
;
|
|
lea esi, pal_tbl
|
|
lea edi, lum_tbl
|
|
mov ecx, 256
|
|
lp3: xor eax, eax
|
|
xor ebx, ebx
|
|
xor edx, edx
|
|
mov al, [esi] ; r
|
|
mov bl, [esi+1] ; g
|
|
mov dl, [esi+2] ; b
|
|
add esi, 3
|
|
imul eax, 2990
|
|
imul ebx, 5866
|
|
imul edx, 1144
|
|
add eax, ebx
|
|
add eax, edx
|
|
mov [edi], eax
|
|
add edi, 4
|
|
dec ecx
|
|
jnz lp3
|
|
|
|
ret
|
|
nfPkPal ENDP
|
|
|
|
elseif INTERP eq 3
|
|
|
|
nfPkInterp1 MACRO left:REQ, right:REQ
|
|
xor eax, eax
|
|
mov al, left
|
|
mov ah, right
|
|
mov eax, [esi+eax*4]
|
|
mov edx, eax
|
|
mov dl, dh
|
|
shl edx, 8
|
|
mov dl, left
|
|
mov [edi], edx
|
|
mov ah, right
|
|
ror eax, 16
|
|
mov [edi+4], eax
|
|
ENDM
|
|
|
|
nfPkInterp2 MACRO left:REQ, right:REQ
|
|
xor eax, eax
|
|
mov al, left
|
|
mov ah, right
|
|
mov eax, [esi+eax*4]
|
|
mov edx, eax
|
|
mov dl, dh
|
|
shl edx, 8
|
|
mov dl, left
|
|
mov [edi], edx
|
|
mov [edi+ebp*1], edx
|
|
mov ah, right
|
|
ror eax, 16
|
|
mov [edi+4], eax
|
|
mov [edi+4+ebp*1], eax
|
|
ENDM
|
|
|
|
endif
|
|
|
|
ifdef SYMANTEC
|
|
EXTERN _data_bottom:PTRBYTE
|
|
endif
|
|
|
|
; Normal version
|
|
;
|
|
nfPkDecomp PROC USES ESI EDI EBX, \
|
|
ops:PTRBYTE, comp:PTRBYTE, \
|
|
x:DWORD, y:DWORD, w:DWORD, h:DWORD
|
|
LOCAL tbuf: PTRBYTE
|
|
LOCAL new_row:DWORD
|
|
LOCAL DiffBufPtrs:DWORD
|
|
|
|
LOCAL nfpk_back_right: DWORD
|
|
LOCAL wcnt:DWORD
|
|
|
|
LOG_LABEL "StartPkDecomp"
|
|
|
|
.data
|
|
nfpk_OpTbl label dword
|
|
dword offset nf0 ; Prev Same (0)
|
|
dword offset nf1 ; No change (and copied to screen) (0)
|
|
dword offset nf2 ; Near shift from older part of current buf (1)
|
|
dword offset nf3 ; Near shift from newer part of current buf (1)
|
|
dword offset nf4 ; Near shift from previous buffer (1)
|
|
dword offset nf5 ; Far shift from previous buffer (2)
|
|
dword offset nf6 ; Far shift from current buffer (2)
|
|
; [Or if COMPOPS, run of no changes (0)]
|
|
dword offset nf7 ; 8x8x1 (10 bytes) or low 4x4x1 (4 bytes)
|
|
dword offset nf8 ; 2x2 4x4x1 (16 bytes) or 2x1 4x8x1 (12 bytes) or 1x2 8x4x1 (12 bytes)
|
|
dword offset nf9 ; 8x8x2 (20 bytes) or low 4x4x2 (8 bytes) or
|
|
; low 4x8x2 (12 bytes) or low 8x4x2 (12 bytes)
|
|
dword offset nf10 ; 2x2 4x4x2 (32 bytes) or 2x1 4x8x2 (24 bytes) or 1x2 4x8x2 (24 bytes)
|
|
dword offset nf11 ; 8x8x8 (64 bytes)
|
|
dword offset nf12 ; low 4x4x8 (16 bytes)
|
|
dword offset nf13 ; 2x2 4x4x0 (ie 2x2x8) (4 bytes)
|
|
dword offset nf14 ; 8x8x0 (1 byte)
|
|
dword offset nf15 ; mix 8x8x0 (2 bytes)
|
|
.code
|
|
|
|
ifdef SYMANTEC
|
|
mov ebx, ds ; Allow DS to access code
|
|
mov ecx, 0
|
|
mov ax, 3505h
|
|
int 21h
|
|
endif
|
|
|
|
NF_DECOMP_INIT 0
|
|
|
|
mov eax, nf_back_right
|
|
sub eax, SWIDTH
|
|
mov nfpk_back_right, eax
|
|
|
|
mov esi, comp
|
|
mov edi, tbuf
|
|
nf_StartRow:
|
|
mov eax, w
|
|
shr eax, 1
|
|
mov wcnt,eax
|
|
ALIGN 4
|
|
nf_NextPair:
|
|
dec wcnt
|
|
js nf_NextRow
|
|
mov ebx, ops
|
|
mov al, [ebx]
|
|
inc ebx
|
|
mov ops, ebx
|
|
|
|
xor ebx, ebx
|
|
mov bl, al
|
|
shr bl, 4
|
|
and eax, 0Fh
|
|
push offset nf_NextPair
|
|
push nfpk_OpTbl[ebx*4]
|
|
jmp nfpk_OpTbl[eax*4]
|
|
|
|
nf_NextRow:
|
|
add edi, new_row
|
|
dec h
|
|
jnz nf_StartRow
|
|
LOG_LABEL "EndPkDecomp"
|
|
|
|
ifdef SYMANTEC
|
|
mov ebx, ds ; Disable DS from accessing code
|
|
mov ecx, offset DGROUP:_data_bottom[-1]
|
|
mov ax, 3505h
|
|
int 21h
|
|
endif
|
|
ret
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
if INTERP eq 0
|
|
|
|
nf0: ; No change from previous buffer
|
|
mov eax, DiffBufPtrs
|
|
jmp nf_shift
|
|
|
|
elseif INTERP eq 3
|
|
nf0: ; Interpolated (1 byte)
|
|
push ebp
|
|
|
|
mov ebp, nf_width
|
|
sub edi, ebp ; Get four corner colors
|
|
mov bl, [edi-1] ; into bl,bh,cl,ch
|
|
mov bh, [edi+7]
|
|
mov cl, [edi+ebp*8-1]
|
|
mov ch, [esi]
|
|
inc esi
|
|
add edi, ebp
|
|
|
|
push esi
|
|
mov esi, blend_tbl
|
|
|
|
nfPkInterp1 bl,bh
|
|
add edi, ebp
|
|
push ebx
|
|
push ecx
|
|
xor eax, eax
|
|
mov al, bl
|
|
mov ah, cl
|
|
mov edx, [esi+eax*4]
|
|
mov al, bh
|
|
mov ah, ch
|
|
mov ecx, [esi+eax*4]
|
|
mov ebx, edx
|
|
nfPkInterp2 bh,ch
|
|
lea edi, [edi+ebp*2]
|
|
ror ebx, 16
|
|
ror ecx, 16
|
|
nfPkInterp2 bl,cl
|
|
lea edi, [edi+ebp*2]
|
|
nfPkInterp2 bh,ch
|
|
lea edi, [edi+ebp*2]
|
|
pop ecx
|
|
pop ebx
|
|
nfPkInterp1 cl,ch
|
|
|
|
pop esi
|
|
pop ebp
|
|
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
elseif INTERP eq 2
|
|
|
|
nf0: ; Interpolated (1 byte)
|
|
mov edx, nf_width
|
|
sub edi, edx ; Get four corner colors
|
|
sub edi, edx ;xxx
|
|
mov bl, [edi-1] ; into bl,bh,cl,ch
|
|
mov bh, [edi+7]
|
|
mov cl, [edi+edx*8-1]
|
|
mov ch, [esi]
|
|
inc esi
|
|
add edi, edx ;xxx
|
|
add edi, edx
|
|
|
|
; Get four luminances into eax, ebx, ebp, ecx
|
|
; Use edx for temp, esi for closest luminance, edi for closest pair
|
|
push ebx
|
|
push ecx
|
|
push esi
|
|
push edi
|
|
push ebp
|
|
|
|
xor edx, edx
|
|
mov dl, bl
|
|
mov eax, lum_tbl[edx*4]
|
|
mov dl, bh
|
|
mov ebx, lum_tbl[edx*4]
|
|
mov dl, cl
|
|
mov ebp, lum_tbl[edx*4]
|
|
mov dl, ch
|
|
mov ecx, lum_tbl[edx*4]
|
|
|
|
mov edx, eax
|
|
sub edx, ebx
|
|
jns nf0a
|
|
neg edx
|
|
nf0a: mov esi, edx
|
|
mov edi, 0 ; Vert
|
|
|
|
mov edx, eax
|
|
sub edx, ebp
|
|
jns nf0b
|
|
neg edx
|
|
nf0b: cmp edx, esi
|
|
ja nf0c
|
|
mov esi, edx
|
|
mov edi, 1 ; Horiz
|
|
|
|
nf0c: mov edx, eax
|
|
sub edx, ecx
|
|
jns nf0d
|
|
neg edx
|
|
nf0d: cmp edx, esi
|
|
ja nf0e
|
|
mov esi, edx
|
|
mov edi, 2 ; \ Diag
|
|
|
|
nf0e: mov edx, ebx
|
|
sub edx, ebp
|
|
jns nf0f
|
|
neg edx
|
|
nf0f: cmp edx, esi
|
|
ja nf0g
|
|
mov esi, edx ; / RDiag
|
|
mov edi, 3
|
|
|
|
nf0g:
|
|
mov edx, ebx
|
|
sub edx, ecx
|
|
jns nf0h
|
|
neg edx
|
|
nf0h: cmp edx, esi
|
|
ja nf0i
|
|
mov esi, edx
|
|
mov edi, 1 ; Horiz
|
|
|
|
nf0i: mov edx, ebp
|
|
sub edx, ecx
|
|
jns nf0j
|
|
neg edx
|
|
nf0j: cmp edx, esi
|
|
ja nf0k
|
|
mov edi, 0
|
|
|
|
nf0k: mov eax, edi
|
|
pop ebp
|
|
pop edi
|
|
pop esi
|
|
pop ecx
|
|
pop ebx
|
|
mov edx, nf_width
|
|
|
|
cmp eax, 2
|
|
jae nfdiag
|
|
or eax, eax
|
|
jz nf0_v
|
|
jmp nf0_h
|
|
|
|
nfdiag: jz nf0_d
|
|
jmp nf0_r
|
|
|
|
if 1 ; Newer versions of Vertical and Horizontal blend that use 0%,25%,50%,75%,100% instead of just 0%,50%,100%
|
|
|
|
; Vertical blend
|
|
; 0 1
|
|
; 01010101 1
|
|
; 00121013 2
|
|
; 02010311 3
|
|
; 20203131 4
|
|
; 02021313 5
|
|
; 23202331 6
|
|
; 20332123 7
|
|
;2 22233233 8
|
|
;
|
|
|
|
nf0_v:
|
|
; 3412 (low to high)
|
|
;------
|
|
mov al, bl ; 0101 (1)
|
|
mov ah, bh
|
|
shl eax, 16
|
|
mov al, bl
|
|
mov ah, bh
|
|
mov [edi], eax
|
|
mov [edi+4], eax ; 0101
|
|
add edi, edx
|
|
|
|
mov al, bh ; 0012 (2)
|
|
mov ah, cl
|
|
shl eax, 8
|
|
mov al, bl
|
|
mov ah, bl
|
|
mov [edi], eax
|
|
mov al, bh ; 1013
|
|
mov ah, ch
|
|
shl eax, 16
|
|
mov al, bh
|
|
mov ah, bl
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
mov al, bl ; 0201 (3)
|
|
mov ah, bh
|
|
shl eax, 16
|
|
mov al, bl
|
|
mov ah, cl
|
|
mov [edi], eax
|
|
mov al, bh ; 0311
|
|
mov ah, bh
|
|
shl eax, 16
|
|
mov al, bl
|
|
mov ah, ch
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
mov al, cl ; 2020 (4), 0202 (5)
|
|
mov ah, bl
|
|
shl eax, 16
|
|
mov al, cl
|
|
mov ah, bl
|
|
mov [edi], eax
|
|
ror eax, 8
|
|
mov [edi+edx], eax
|
|
mov al, ch ; 3131, 1313
|
|
mov ah, bh
|
|
shl eax, 16
|
|
mov al, ch
|
|
mov ah, bh
|
|
mov [edi+4], eax
|
|
ror eax, 8
|
|
mov [edi+edx+4], eax
|
|
lea edi, [edi+edx*2]
|
|
|
|
mov al, cl ; 2320 (6)
|
|
mov ah, bl
|
|
shl eax, 16
|
|
mov al, cl
|
|
mov ah, ch
|
|
mov [edi], eax
|
|
mov al, ch ; 2331
|
|
mov ah, bh
|
|
shl eax, 16
|
|
mov al, cl
|
|
mov ah, ch
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
rol eax, 8 ; 2033 (7)
|
|
mov al, cl
|
|
mov ah, bl
|
|
mov [edi], eax
|
|
mov al, cl ; 2123
|
|
mov ah, ch
|
|
shl eax, 16
|
|
mov al, cl
|
|
mov ah, bh
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
mov ah, cl ; 2223 (8)
|
|
mov [edi], eax
|
|
mov al, ch ; 3233
|
|
mov ah, ch
|
|
shl eax, 16
|
|
mov al, ch
|
|
mov ah, cl
|
|
mov [edi+4], eax
|
|
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
retn
|
|
|
|
; Horizontal blend
|
|
; 0 1
|
|
; 00010111 1
|
|
; 20101301 2
|
|
; 02010131 3
|
|
; 21201033 4
|
|
; 02032113 5
|
|
; 20323321 6
|
|
; 02232313 7
|
|
;2 23223233 8
|
|
|
|
nf0_h:
|
|
; 3412 (low to high)
|
|
;------
|
|
mov al, bl ; 0001 (1)
|
|
mov ah, bh
|
|
shl eax, 16
|
|
mov al, bl
|
|
mov ah, bl
|
|
mov [edi], eax
|
|
mov al, bh ; 0111
|
|
mov ah, bh
|
|
rol eax, 16
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
ror eax, 8 ; 2010 (2)
|
|
mov al, cl
|
|
mov ah, bl
|
|
mov [edi], eax
|
|
rol eax, 8
|
|
mov al, bh ; 1301
|
|
mov ah, ch
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
mov al, bl ; 0201 (3)
|
|
mov ah, cl
|
|
mov [edi], eax
|
|
mov al, ch ; 0131
|
|
mov ah, bh
|
|
rol eax, 16
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
mov al, cl ; 2120 (4)
|
|
mov ah, bl
|
|
shl eax, 16
|
|
mov al, cl
|
|
mov ah, bh
|
|
mov [edi], eax
|
|
mov al, ch ; 1033
|
|
mov ah, ch
|
|
shl eax, 16
|
|
mov al, bh
|
|
mov ah, bl
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
rol eax, 8 ; 0203 (5)
|
|
mov al, bl
|
|
mov ah, cl
|
|
mov [edi], eax
|
|
mov al, bh ; 2113
|
|
mov ah, ch
|
|
shl eax, 16
|
|
mov al, cl
|
|
mov ah, bh
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
ror eax, 8 ; 2032 (6)
|
|
mov al, cl
|
|
mov ah, bl
|
|
mov [edi], eax
|
|
mov al, bh ; 3321
|
|
mov ah, ch
|
|
ror eax, 8
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
mov al, cl ; 0223 (7)
|
|
mov ah, ch
|
|
shl eax, 16
|
|
mov al, bl
|
|
mov ah, cl
|
|
mov [edi], eax
|
|
mov al, bh ; 2313
|
|
mov ah, ch
|
|
rol eax, 16
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
shl eax, 16 ; 2322 (8)
|
|
mov al, cl
|
|
mov ah, ch
|
|
mov [edi], eax
|
|
mov al, ch ; 3233
|
|
mov ah, ch
|
|
shl eax, 16
|
|
mov al, ch
|
|
mov ah, cl
|
|
mov [edi+4], eax
|
|
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
retn
|
|
|
|
else
|
|
|
|
; Vertical blend
|
|
;0 1
|
|
; 00101011 1
|
|
; 00010111 2
|
|
; 20203131 3
|
|
; 02021313 4
|
|
; 20203131 5
|
|
; 02021313 6
|
|
; 22323233 7
|
|
;2 22232333 8
|
|
;
|
|
nf0_v:
|
|
push ebp
|
|
; 3412 (low to high)
|
|
;------
|
|
mov al, bh ; 0010 (1)
|
|
mov ah, bl
|
|
shl eax, 16
|
|
mov al, bl
|
|
mov ah, bl
|
|
mov [edi], eax
|
|
mov al, bh ; 1011
|
|
mov ah, bh
|
|
rol eax, 16
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
rol eax, 8 ; 0001 (2)
|
|
mov al, bl
|
|
mov ah, bl
|
|
mov [edi], eax
|
|
mov al, bh ; 0111
|
|
mov ah, bh
|
|
rol eax, 16
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
mov al, cl ; 2020 (3+5)
|
|
mov ah, bl
|
|
shl eax, 16
|
|
mov al, cl
|
|
mov ah, bl
|
|
mov ebp, eax
|
|
mov [edi], eax
|
|
mov [edi+edx*2], eax
|
|
mov al, ch ; 3131
|
|
mov ah, bh
|
|
shl eax, 16
|
|
mov al, ch
|
|
mov ah, bh
|
|
mov [edi+4], eax
|
|
mov [edi+edx*2+4], eax
|
|
add edi, edx
|
|
|
|
rol ebp, 8 ; 0202 (4+6)
|
|
mov [edi], ebp
|
|
mov [edi+edx*2], ebp
|
|
rol eax, 8 ; 1313
|
|
mov [edi+4], eax
|
|
mov [edi+edx*2+4], eax
|
|
add edi, edx
|
|
lea edi, [edi+edx*2]
|
|
|
|
mov al, ch ; 2232 (7)
|
|
mov ah, cl
|
|
shl eax, 16
|
|
mov al, cl
|
|
mov ah, cl
|
|
mov [edi], eax
|
|
mov al, ch ; 3233
|
|
mov ah, ch
|
|
rol eax, 16
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
mov al, cl ; 2223 (8)
|
|
mov ah, ch
|
|
shl eax, 16
|
|
mov al, cl
|
|
mov ah, cl
|
|
mov [edi], eax
|
|
mov al, ch ; 2333
|
|
mov ah, ch
|
|
rol eax, 16
|
|
mov [edi+4], eax
|
|
|
|
pop ebp
|
|
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
retn
|
|
|
|
; Horizontal blend
|
|
;0 1
|
|
; 00101011 1
|
|
; 00010111 2
|
|
; 20101031 3
|
|
; 02010113 4
|
|
; 20323231 5
|
|
; 02232313 6
|
|
; 22323233 7
|
|
;2 22232333 8
|
|
;
|
|
nf0_h:
|
|
; 3412 (low to high)
|
|
;------
|
|
mov al, bh ; 0010 (1)
|
|
mov ah, bl
|
|
shl eax, 16
|
|
mov al, bl
|
|
mov ah, bl
|
|
mov [edi], eax
|
|
mov al, bh ; 1011
|
|
mov ah, bh
|
|
rol eax, 16
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
rol eax, 8 ; 0001 (2)
|
|
mov al, bl
|
|
mov ah, bl
|
|
mov [edi], eax
|
|
mov al, bh ; 0111
|
|
mov ah, bh
|
|
rol eax, 16
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
ror eax, 8 ; 2010 (3)
|
|
mov al, cl
|
|
mov ah, bl
|
|
mov [edi], eax
|
|
mov al, ch ; 1031
|
|
mov ah, bh
|
|
rol eax, 16
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
mov al, bl ; 0201 (4)
|
|
mov ah, bh
|
|
rol eax, 16
|
|
mov al, bl
|
|
mov ah, cl
|
|
mov [edi], eax
|
|
mov al, bh ; 0113
|
|
mov ah, ch
|
|
rol eax, 16
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
mov al, ch ; 2032 (5)
|
|
mov ah, cl
|
|
shl eax, 16
|
|
mov al, cl
|
|
mov ah, bl
|
|
mov [edi], eax
|
|
mov al, ch ; 3231
|
|
mov ah, bh
|
|
rol eax, 16
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
rol eax, 8 ; 0223 (6)
|
|
mov al, bl
|
|
mov ah, cl
|
|
mov [edi], eax
|
|
mov al, bh ; 2313
|
|
mov ah, ch
|
|
rol eax, 16
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
mov al, ch ; 2232 (7)
|
|
mov ah, cl
|
|
shl eax, 16
|
|
mov al, cl
|
|
mov ah, cl
|
|
mov [edi], eax
|
|
mov al, ch ; 3233
|
|
mov ah, ch
|
|
rol eax, 16
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
mov al, cl ; 2223 (8)
|
|
mov ah, ch
|
|
shl eax, 16
|
|
mov al, cl
|
|
mov ah, cl
|
|
mov [edi], eax
|
|
mov al, ch ; 2333
|
|
mov ah, ch
|
|
rol eax, 16
|
|
mov [edi+4], eax
|
|
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
retn
|
|
endif
|
|
|
|
; \ Diagonal blend
|
|
;0 1
|
|
; 00010101 1
|
|
; 00001313 2
|
|
; 20303101 3
|
|
; 02030313 4
|
|
; 23203031 5
|
|
; 02020333 6
|
|
; 23232333 7
|
|
;2 22023233 8
|
|
;
|
|
nf0_d:
|
|
; 3412 (low to high)
|
|
;------
|
|
mov al, bl ; 0001 (1)
|
|
mov ah, bh
|
|
shl eax, 16
|
|
mov al, bl
|
|
mov ah, bl
|
|
mov [edi], eax
|
|
mov ah, bh ; 0101
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
mov ah, bl ; 0000 (2)
|
|
rol eax, 16
|
|
mov ah, bl
|
|
mov [edi], eax
|
|
mov al, bh ; 1313
|
|
mov ah, ch
|
|
shl eax, 16
|
|
mov al, bh
|
|
mov ah, ch
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
mov al, ch ; 2030 (3)
|
|
mov ah, bl
|
|
shl eax, 16
|
|
mov al, cl
|
|
mov ah, bl
|
|
mov [edi], eax
|
|
mov al, bl ; 3101
|
|
mov ah, bh
|
|
shl eax, 16
|
|
mov al, ch
|
|
mov ah, bh
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
mov al, bl ; 0203 (4)
|
|
mov ah, ch
|
|
shl eax, 16
|
|
mov al, bl
|
|
mov ah, cl
|
|
mov [edi], eax
|
|
mov al, bh ; 0313
|
|
mov ah, ch
|
|
rol eax, 16
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
mov al, cl ; 2320 (5)
|
|
mov ah, bl
|
|
shl eax, 16
|
|
mov al, cl
|
|
mov ah, ch
|
|
mov [edi], eax
|
|
mov al, ch ; 3031
|
|
mov ah, bh
|
|
shl eax, 16
|
|
mov al, ch
|
|
mov ah, bl
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
mov al, bl ; 0202 (6)
|
|
mov ah, cl
|
|
shl eax, 16
|
|
mov al, bl
|
|
mov ah, cl
|
|
mov [edi], eax
|
|
mov ah, ch ; 0333
|
|
shl eax, 16
|
|
mov al, ch
|
|
mov ah, ch
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
mov al, cl ; 2323 (7)
|
|
rol eax, 16
|
|
mov al, cl
|
|
mov [edi], eax
|
|
mov al, ch ; 2333
|
|
rol eax, 16
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
mov al, bl ; 2202 (8)
|
|
mov ah, cl
|
|
shl eax, 16
|
|
mov al, cl
|
|
mov ah, cl
|
|
mov [edi], eax
|
|
mov al, ch ; 3233
|
|
mov ah, ch
|
|
shl eax, 16
|
|
mov al, ch
|
|
mov ah, cl
|
|
mov [edi+4], eax
|
|
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
retn
|
|
|
|
|
|
; / RDiagonal blend
|
|
;0 1
|
|
; 01010111 1
|
|
; 20201111 2
|
|
; 01021313 3
|
|
; 20212131 4
|
|
; 02121323 5
|
|
; 22213131 6
|
|
; 22232323 7
|
|
;2 22323133 8
|
|
;
|
|
nf0_r:
|
|
; 3412 (low to high)
|
|
;------
|
|
mov al, bl ; 0101 (1)
|
|
mov ah, bh
|
|
shl eax, 16
|
|
mov al, bl
|
|
mov ah, bh
|
|
mov [edi], eax
|
|
mov al, bh ; 0111
|
|
rol eax, 16
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
mov al, cl ; 2020 (2)
|
|
mov ah, bl
|
|
shl eax, 16
|
|
mov al, cl
|
|
mov ah, bl
|
|
mov [edi], eax
|
|
mov al, bh ; 1111
|
|
mov ah, bh
|
|
shl eax, 16
|
|
mov al, bh
|
|
mov ah, bh
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
mov al, bl ; 0102 (3)
|
|
mov ah, cl
|
|
rol eax, 16
|
|
mov al, bl
|
|
mov [edi], eax
|
|
mov al, bh ; 1313
|
|
mov ah, ch
|
|
shl eax, 16
|
|
mov al, bh
|
|
mov ah, ch
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
mov al, cl ; 2021 (4)
|
|
mov ah, bh
|
|
shl eax, 16
|
|
mov al, cl
|
|
mov ah, bl
|
|
mov [edi], eax
|
|
mov al, ch ; 2131
|
|
mov ah, bh
|
|
rol eax, 16
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
ror eax, 8 ; 0212 (5)
|
|
mov al, bl
|
|
mov ah, cl
|
|
mov [edi], eax
|
|
mov al, cl ; 1323
|
|
mov ah, ch
|
|
shl eax, 16
|
|
mov al, bh
|
|
mov ah, ch
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
mov al, cl ; 2221 (6)
|
|
mov ah, bh
|
|
shl eax, 16
|
|
mov al, cl
|
|
mov ah, cl
|
|
mov [edi], eax
|
|
mov al, ch ; 3131
|
|
mov ah, bh
|
|
shl eax, 16
|
|
mov al, ch
|
|
mov ah, bh
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
mov al, cl ; 2223 (7)
|
|
mov ah, ch
|
|
shl eax, 16
|
|
mov al, cl
|
|
mov ah, cl
|
|
mov [edi], eax
|
|
mov ah, ch ; 2323
|
|
rol eax, 16
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
rol eax, 8 ; 2232 (8)
|
|
mov al, cl
|
|
mov [edi], eax
|
|
mov al, ch ; 3133
|
|
mov ah, ch
|
|
shl eax, 16
|
|
mov al, ch
|
|
mov ah, bh
|
|
mov [edi+4], eax
|
|
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
retn
|
|
|
|
elseif INTERP eq 1
|
|
|
|
nf0: ; Interpolated (1 byte)
|
|
mov edx, nf_width
|
|
sub edi, edx ; Get four corner colors
|
|
sub edi, edx ;xxx
|
|
mov bl, [edi-1] ; into bl,bh,cl,ch
|
|
mov bh, [edi+7]
|
|
mov cl, [edi+edx*8-1]
|
|
mov ch, [esi]
|
|
inc esi
|
|
add edi, edx ;xxx
|
|
add edi, edx
|
|
|
|
; Pattern for interpolating four corners:
|
|
;0 1
|
|
; 00101011 1
|
|
; 00010111 2
|
|
; 20023113 3
|
|
; 02101031 4
|
|
; 20323213 5
|
|
; 02201331 6
|
|
; 22232333 7
|
|
;2 22323233 8
|
|
; 3412 (low to high)
|
|
;------
|
|
nf0_1:
|
|
mov al, bh ; 0010
|
|
mov ah, bl
|
|
shl eax, 16
|
|
mov al, bl
|
|
mov ah, bl
|
|
mov [edi], eax
|
|
mov al, bh ; 1011
|
|
mov ah, bh
|
|
rol eax, 16
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf0_2: rol eax, 8 ; 0001
|
|
mov al, bl
|
|
mov ah, bl
|
|
mov [edi], eax
|
|
mov al, bh ; 0111
|
|
mov ah, bh
|
|
rol eax, 16
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf0_3: mov al, bl ; 2002
|
|
mov ah, cl
|
|
shl eax, 16
|
|
mov al, cl
|
|
mov ah, bl
|
|
mov [edi], eax
|
|
mov al, bh ; 3113
|
|
mov ah, ch
|
|
shl eax, 16
|
|
mov al, ch
|
|
mov ah, bh
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf0_4: mov al, bh ; 0210
|
|
mov ah, bl
|
|
shl eax, 16
|
|
mov al, bl
|
|
mov ah, cl
|
|
mov [edi], eax
|
|
mov al, ch ; 1031
|
|
mov ah, bh
|
|
rol eax, 16
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf0_5: mov al, cl ; 2032
|
|
mov ah, ch
|
|
shl eax, 16
|
|
mov al, cl
|
|
mov ah, bl
|
|
mov [edi], eax
|
|
mov al, bh ; 3213
|
|
mov ah, ch
|
|
rol eax, 16
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf0_6: mov al, cl ; 0220
|
|
mov ah, bl
|
|
shl eax, 16
|
|
mov al, bl
|
|
mov ah, cl
|
|
mov [edi], eax
|
|
mov al, ch ; 1331
|
|
mov ah, bh
|
|
shl eax, 16
|
|
mov al, bh
|
|
mov ah, ch
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf0_7: mov al, cl ; 2223
|
|
mov ah, ch
|
|
shl eax, 16
|
|
mov al, cl
|
|
mov ah, cl
|
|
mov [edi], eax
|
|
mov al, ch ; 2333
|
|
mov ah, ch
|
|
rol eax, 16
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf0_8: ror eax, 8 ; 2232
|
|
mov al, cl
|
|
mov ah, cl
|
|
mov [edi], eax
|
|
mov al, ch ; 3233
|
|
mov ah, ch
|
|
rol eax, 16
|
|
mov [edi+4], eax
|
|
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
endif
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf1: ; No change (and copied to screen)
|
|
add edi, SWIDTH
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf2: ; Near shift from older part of current buffer
|
|
xor eax, eax
|
|
mov al, [esi]
|
|
inc esi
|
|
mov ax, nfpk_ShiftP2[eax*2]
|
|
nf_xyc_shift:
|
|
xor ebx, ebx
|
|
mov bl, ah
|
|
shl eax, 24
|
|
sar eax, 24
|
|
add eax, nfpk_ShiftY[ebx*4]
|
|
jmp nf_shift
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf3: ; Near shift from newer part of current buffer
|
|
xor eax, eax
|
|
mov al, [esi]
|
|
inc esi
|
|
mov ax, nfpk_ShiftP2[eax*2]
|
|
neg al
|
|
neg ah
|
|
jmp nf_xyc_shift
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf4: ; Near shift from previous buffer
|
|
xor eax, eax
|
|
mov al, [esi]
|
|
inc esi
|
|
mov ax, nfpk_ShiftP1[eax*2]
|
|
jmp nf_xyp_shift
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf5: ; Far shift from previous buffer
|
|
mov ax, [esi]
|
|
add esi, 2
|
|
nf_xyp_shift:
|
|
xor ebx, ebx
|
|
mov bl, ah
|
|
shl eax, 24
|
|
sar eax, 24
|
|
add eax, nfpk_ShiftY[ebx*4]
|
|
add eax, DiffBufPtrs
|
|
jmp nf_shift
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
|
|
if COMPOPS
|
|
|
|
nf6: ; Run of no changes (must only appear in first nibble opcodes)
|
|
; Next nibble k specifies 2k+4 squares with no changes
|
|
add esp, 4 ; Next nibble is not an opcode
|
|
add ebx, 2 ; (minimum of 4 squares)
|
|
ALIGN 4
|
|
nf6a: add edi, SWIDTH*2 ; Advance over two squares
|
|
dec ebx
|
|
jz nf6z ; Last pair of squares
|
|
dec wcnt ; Same row?
|
|
jns nf6a ; Yes
|
|
add edi, new_row ; Advance to next row
|
|
dec h ; Decrement row count (should never become zero here)
|
|
mov eax, w ; Reset wcnt
|
|
shr eax ,1
|
|
dec eax
|
|
mov wcnt, eax
|
|
jmp nf6a
|
|
|
|
nf6z: retn
|
|
|
|
else
|
|
|
|
nf6: ; Far shift from current buffer
|
|
mov ax, [esi]
|
|
add esi, 2
|
|
jmp nf_xyc_shift
|
|
|
|
endif
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf_shift:
|
|
if 0 ;debug
|
|
mov eax, 0
|
|
mov ebx, eax
|
|
jmp nf_solid
|
|
endif
|
|
mov ebx, esi ; save esi
|
|
lea esi, [edi+eax]
|
|
mov edx, nf_width
|
|
|
|
REPEAT 7
|
|
mov eax, [esi]
|
|
mov [edi], eax
|
|
mov eax, [esi+4]
|
|
mov [edi+4], eax
|
|
add esi, edx
|
|
add edi, edx
|
|
ENDM
|
|
mov eax, [esi]
|
|
mov [edi], eax
|
|
mov eax, [esi+4]
|
|
mov [edi+4], eax
|
|
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
mov esi, ebx ; restore esi
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf7: ; 8x8x1 (10 bytes)
|
|
|
|
mov ax, [esi]
|
|
cmp al, ah
|
|
ja nf23
|
|
|
|
if 0 ;debug
|
|
add esi, 10
|
|
mov eax, 0fefefefeH
|
|
mov ebx, eax
|
|
jmp nf_solid
|
|
endif
|
|
xor eax, eax
|
|
lea ecx, nfpk_mov8
|
|
lea edx, byte ptr ds:nf7_11+2
|
|
|
|
mov al, [esi+2]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf7_11-nf7_11)], bl
|
|
mov [edx+(nf7_12-nf7_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf7_13-nf7_11)], bl
|
|
mov [edx+(nf7_14-nf7_11)], bh
|
|
|
|
mov al, [esi+3]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf7_21-nf7_11)], bl
|
|
mov [edx+(nf7_22-nf7_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf7_23-nf7_11)], bl
|
|
mov [edx+(nf7_24-nf7_11)], bh
|
|
|
|
mov al, [esi+4]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf7_31-nf7_11)], bl
|
|
mov [edx+(nf7_32-nf7_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf7_33-nf7_11)], bl
|
|
mov [edx+(nf7_34-nf7_11)], bh
|
|
|
|
mov al, [esi+5]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf7_41-nf7_11)], bl
|
|
mov [edx+(nf7_42-nf7_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf7_43-nf7_11)], bl
|
|
mov [edx+(nf7_44-nf7_11)], bh
|
|
|
|
lea edx, [edx+(nf7_51-nf7_11)]
|
|
|
|
mov al, [esi+6]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf7_51-nf7_51)], bl
|
|
mov [edx+(nf7_52-nf7_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf7_53-nf7_51)], bl
|
|
mov [edx+(nf7_54-nf7_51)], bh
|
|
|
|
mov al, [esi+7]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf7_61-nf7_51)], bl
|
|
mov [edx+(nf7_62-nf7_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf7_63-nf7_51)], bl
|
|
mov [edx+(nf7_64-nf7_51)], bh
|
|
|
|
mov al, [esi+8]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf7_71-nf7_51)], bl
|
|
mov [edx+(nf7_72-nf7_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf7_73-nf7_51)], bl
|
|
mov [edx+(nf7_74-nf7_51)], bh
|
|
|
|
mov al, [esi+9]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf7_81-nf7_51)], bl
|
|
mov [edx+(nf7_82-nf7_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf7_83-nf7_51)], bl
|
|
mov [edx+(nf7_84-nf7_51)], bh
|
|
|
|
push ebp
|
|
push esi
|
|
; load bx,dx,cx,bp with 00,01,10,11 color combinations
|
|
; (note that bits are read least significant first).
|
|
mov cx, [esi]
|
|
mov esi,nf_width
|
|
mov bl,cl
|
|
mov bh,cl
|
|
mov dl,ch
|
|
mov dh,cl
|
|
mov al,ch
|
|
mov ah,ch
|
|
mov ebp,eax
|
|
jmp nf7_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf7_0:
|
|
nf7_11: mov ax, bx
|
|
shl eax, 16
|
|
nf7_12: mov ax, bx
|
|
mov [edi], eax
|
|
nf7_13: mov ax, bx
|
|
shl eax, 16
|
|
nf7_14: mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf7_21: mov ax, bx
|
|
shl eax, 16
|
|
nf7_22: mov ax, bx
|
|
mov [edi], eax
|
|
nf7_23: mov ax, bx
|
|
shl eax, 16
|
|
nf7_24: mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf7_31: mov ax, bx
|
|
shl eax, 16
|
|
nf7_32: mov ax, bx
|
|
mov [edi], eax
|
|
nf7_33: mov ax, bx
|
|
shl eax, 16
|
|
nf7_34: mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf7_41: mov ax, bx
|
|
shl eax, 16
|
|
nf7_42: mov ax, bx
|
|
mov [edi], eax
|
|
nf7_43: mov ax, bx
|
|
shl eax, 16
|
|
nf7_44: mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf7_51: mov ax, bx
|
|
shl eax, 16
|
|
nf7_52: mov ax, bx
|
|
mov [edi], eax
|
|
nf7_53: mov ax, bx
|
|
shl eax, 16
|
|
nf7_54: mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf7_61: mov ax, bx
|
|
shl eax, 16
|
|
nf7_62: mov ax, bx
|
|
mov [edi], eax
|
|
nf7_63: mov ax, bx
|
|
shl eax, 16
|
|
nf7_64: mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf7_71: mov ax, bx
|
|
shl eax, 16
|
|
nf7_72: mov ax, bx
|
|
mov [edi], eax
|
|
nf7_73: mov ax, bx
|
|
shl eax, 16
|
|
nf7_74: mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf7_81: mov ax, bx
|
|
shl eax, 16
|
|
nf7_82: mov ax, bx
|
|
mov [edi], eax
|
|
nf7_83: mov ax, bx
|
|
shl eax, 16
|
|
nf7_84: mov ax, bx
|
|
mov [edi+4], eax
|
|
|
|
pop esi
|
|
pop ebp
|
|
add esi, 10
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
;nf7+16
|
|
nf23: ; low 4x4x1 (4 bytes)
|
|
|
|
xor eax, eax
|
|
lea ecx, nfpk_mov4l
|
|
lea edx, byte ptr ds:nf23_11+2
|
|
|
|
mov al, [esi+2]
|
|
and al, 0fH
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf23_11-nf23_11)], bl
|
|
mov [edx+(nf23_12-nf23_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf23_13-nf23_11)], bl
|
|
mov [edx+(nf23_14-nf23_11)], bh
|
|
|
|
mov al, [esi+2]
|
|
shr al, 4
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf23_31-nf23_11)], bl
|
|
mov [edx+(nf23_32-nf23_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf23_33-nf23_11)], bl
|
|
mov [edx+(nf23_34-nf23_11)], bh
|
|
|
|
|
|
mov al, [esi+3]
|
|
and al, 0fH
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf23_51-nf23_11)], bl
|
|
mov [edx+(nf23_52-nf23_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf23_53-nf23_11)], bl
|
|
mov [edx+(nf23_54-nf23_11)], bh
|
|
|
|
mov al, [esi+3]
|
|
shr al, 4
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf23_71-nf23_11)], bl
|
|
mov [edx+(nf23_72-nf23_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf23_73-nf23_11)], bl
|
|
mov [edx+(nf23_74-nf23_11)], bh
|
|
|
|
mov edx, nf_width
|
|
|
|
; load bx,cx with 00,11 color combinations
|
|
mov bx, [esi]
|
|
mov cl, bh
|
|
mov bh, bl
|
|
mov ch, cl
|
|
|
|
jmp nf23_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf23_0:
|
|
|
|
nf23_11:mov ax, bx
|
|
shl eax, 16
|
|
nf23_12:mov ax, bx
|
|
mov [edi], eax
|
|
mov [edi+edx], eax
|
|
|
|
nf23_13:mov ax, bx
|
|
shl eax, 16
|
|
nf23_14:mov ax, bx
|
|
mov [edi+4], eax
|
|
mov [edi+edx+4], eax
|
|
lea edi, [edi+edx*2]
|
|
|
|
nf23_31:mov ax, bx
|
|
shl eax, 16
|
|
nf23_32:mov ax, bx
|
|
mov [edi], eax
|
|
mov [edi+edx], eax
|
|
|
|
nf23_33:mov ax, bx
|
|
shl eax, 16
|
|
nf23_34:mov ax, bx
|
|
mov [edi+4], eax
|
|
mov [edi+edx+4], eax
|
|
lea edi, [edi+edx*2]
|
|
|
|
nf23_51:mov ax, bx
|
|
shl eax, 16
|
|
nf23_52:mov ax, bx
|
|
mov [edi], eax
|
|
mov [edi+edx], eax
|
|
|
|
nf23_53:mov ax, bx
|
|
shl eax, 16
|
|
nf23_54:mov ax, bx
|
|
mov [edi+4], eax
|
|
mov [edi+edx+4], eax
|
|
lea edi, [edi+edx*2]
|
|
|
|
nf23_71:mov ax, bx
|
|
shl eax, 16
|
|
nf23_72:mov ax, bx
|
|
mov [edi], eax
|
|
mov [edi+edx], eax
|
|
|
|
nf23_73:mov ax, bx
|
|
shl eax, 16
|
|
nf23_74:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
mov [edi+4], eax
|
|
|
|
sub edi, nfpk_back_right
|
|
add esi, 4
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf8: ; 2x2 4x4x1 (16 bytes)
|
|
|
|
mov ax, [esi]
|
|
cmp al, ah
|
|
ja nf24
|
|
|
|
xor eax, eax
|
|
lea ecx, nfpk_mov8
|
|
lea edx, byte ptr ds:nf8_11+2
|
|
|
|
mov al, [esi+2]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf8_11-nf8_11)], bl
|
|
mov [edx+(nf8_12-nf8_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf8_13-nf8_11)], bl
|
|
mov [edx+(nf8_14-nf8_11)], bh
|
|
|
|
mov al, [esi+3]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf8_21-nf8_11)], bl
|
|
mov [edx+(nf8_22-nf8_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf8_23-nf8_11)], bl
|
|
mov [edx+(nf8_24-nf8_11)], bh
|
|
|
|
|
|
mov al, [esi+6]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf8_31-nf8_11)], bl
|
|
mov [edx+(nf8_32-nf8_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf8_33-nf8_11)], bl
|
|
mov [edx+(nf8_34-nf8_11)], bh
|
|
|
|
mov al, [esi+7]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf8_41-nf8_11)], bl
|
|
mov [edx+(nf8_42-nf8_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf8_43-nf8_11)], bl
|
|
mov [edx+(nf8_44-nf8_11)], bh
|
|
|
|
add edx, nf8_51-nf8_11
|
|
|
|
mov al, [esi+10]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf8_51-nf8_51)], bl
|
|
mov [edx+(nf8_52-nf8_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf8_53-nf8_51)], bl
|
|
mov [edx+(nf8_54-nf8_51)], bh
|
|
|
|
mov al, [esi+11]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf8_61-nf8_51)], bl
|
|
mov [edx+(nf8_62-nf8_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf8_63-nf8_51)], bl
|
|
mov [edx+(nf8_64-nf8_51)], bh
|
|
|
|
|
|
mov al, [esi+14]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf8_71-nf8_51)], bl
|
|
mov [edx+(nf8_72-nf8_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf8_73-nf8_51)], bl
|
|
mov [edx+(nf8_74-nf8_51)], bh
|
|
|
|
mov al, [esi+15]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf8_81-nf8_51)], bl
|
|
mov [edx+(nf8_82-nf8_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf8_83-nf8_51)], bl
|
|
mov [edx+(nf8_84-nf8_51)], bh
|
|
|
|
|
|
push ebp
|
|
push esi
|
|
; load bx,dx,cx,bp with 00,01,10,11 color combinations
|
|
; (note that bits are read least significant first).
|
|
mov cx, [esi]
|
|
mov esi, nf_width
|
|
mov bl,cl
|
|
mov bh,cl
|
|
mov dl,ch
|
|
mov dh,cl
|
|
mov al,ch
|
|
mov ah,ch
|
|
mov ebp,eax
|
|
|
|
jmp nf8_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf8_0:
|
|
nf8_11: mov ax, bx
|
|
shl eax, 16
|
|
nf8_12: mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
nf8_13: mov ax, bx
|
|
shl eax, 16
|
|
nf8_14: mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
|
|
nf8_21: mov ax, bx
|
|
shl eax, 16
|
|
nf8_22: mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
nf8_23: mov ax, bx
|
|
shl eax, 16
|
|
nf8_24: mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
|
|
mov eax, [esp]
|
|
mov cx, [eax+4]
|
|
mov bl,cl
|
|
mov bh,cl
|
|
mov dl,ch
|
|
mov dh,cl
|
|
mov al,ch
|
|
mov ah,ch
|
|
mov ebp,eax
|
|
|
|
nf8_31: mov ax, bx
|
|
shl eax, 16
|
|
nf8_32: mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
nf8_33: mov ax, bx
|
|
shl eax, 16
|
|
nf8_34: mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
|
|
nf8_41: mov ax, bx
|
|
shl eax, 16
|
|
nf8_42: mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
nf8_43: mov ax, bx
|
|
shl eax, 16
|
|
nf8_44: mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
|
|
lea eax, [esi*8-4]
|
|
sub edi, eax
|
|
|
|
mov eax, [esp]
|
|
mov cx, [eax+8]
|
|
mov bl,cl
|
|
mov bh,cl
|
|
mov dl,ch
|
|
mov dh,cl
|
|
mov al,ch
|
|
mov ah,ch
|
|
mov ebp,eax
|
|
|
|
nf8_51: mov ax, bx
|
|
shl eax, 16
|
|
nf8_52: mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
nf8_53: mov ax, bx
|
|
shl eax, 16
|
|
nf8_54: mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
|
|
nf8_61: mov ax, bx
|
|
shl eax, 16
|
|
nf8_62: mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
nf8_63: mov ax, bx
|
|
shl eax, 16
|
|
nf8_64: mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
|
|
mov eax, [esp]
|
|
mov cx, [eax+12]
|
|
mov bl,cl
|
|
mov bh,cl
|
|
mov dl,ch
|
|
mov dh,cl
|
|
mov al,ch
|
|
mov ah,ch
|
|
mov ebp,eax
|
|
|
|
nf8_71: mov ax, bx
|
|
shl eax, 16
|
|
nf8_72: mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
nf8_73: mov ax, bx
|
|
shl eax, 16
|
|
nf8_74: mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
|
|
nf8_81: mov ax, bx
|
|
shl eax, 16
|
|
nf8_82: mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
nf8_83: mov ax, bx
|
|
shl eax, 16
|
|
nf8_84: mov ax, bx
|
|
mov [edi], eax
|
|
|
|
pop esi
|
|
pop ebp
|
|
add esi, 16
|
|
sub edi, 4
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
;nf8+16
|
|
nf24: ; 2x1 4x8x1 (12 bytes)
|
|
|
|
mov ax, [esi+6]
|
|
cmp al, ah
|
|
ja nf40
|
|
|
|
xor eax, eax
|
|
lea ecx, nfpk_mov8
|
|
lea edx, byte ptr ds:nf24_11+2
|
|
|
|
mov al, [esi+2]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf24_11-nf24_11)], bl
|
|
mov [edx+(nf24_12-nf24_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf24_13-nf24_11)], bl
|
|
mov [edx+(nf24_14-nf24_11)], bh
|
|
|
|
mov al, [esi+3]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf24_21-nf24_11)], bl
|
|
mov [edx+(nf24_22-nf24_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf24_23-nf24_11)], bl
|
|
mov [edx+(nf24_24-nf24_11)], bh
|
|
|
|
|
|
mov al, [esi+4]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf24_31-nf24_11)], bl
|
|
mov [edx+(nf24_32-nf24_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf24_33-nf24_11)], bl
|
|
mov [edx+(nf24_34-nf24_11)], bh
|
|
|
|
mov al, [esi+5]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf24_41-nf24_11)], bl
|
|
mov [edx+(nf24_42-nf24_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf24_43-nf24_11)], bl
|
|
mov [edx+(nf24_44-nf24_11)], bh
|
|
|
|
add edx, nf24_51-nf24_11
|
|
|
|
mov al, [esi+8]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf24_51-nf24_51)], bl
|
|
mov [edx+(nf24_52-nf24_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf24_53-nf24_51)], bl
|
|
mov [edx+(nf24_54-nf24_51)], bh
|
|
|
|
mov al, [esi+9]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf24_61-nf24_51)], bl
|
|
mov [edx+(nf24_62-nf24_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf24_63-nf24_51)], bl
|
|
mov [edx+(nf24_64-nf24_51)], bh
|
|
|
|
|
|
mov al, [esi+10]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf24_71-nf24_51)], bl
|
|
mov [edx+(nf24_72-nf24_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf24_73-nf24_51)], bl
|
|
mov [edx+(nf24_74-nf24_51)], bh
|
|
|
|
mov al, [esi+11]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf24_81-nf24_51)], bl
|
|
mov [edx+(nf24_82-nf24_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf24_83-nf24_51)], bl
|
|
mov [edx+(nf24_84-nf24_51)], bh
|
|
|
|
|
|
push ebp
|
|
push esi
|
|
; load bx,dx,cx,bp with 00,01,10,11 color combinations
|
|
; (note that bits are read least significant first).
|
|
mov cx, [esi]
|
|
mov esi, nf_width
|
|
mov bl,cl
|
|
mov bh,cl
|
|
mov dl,ch
|
|
mov dh,cl
|
|
mov al,ch
|
|
mov ah,ch
|
|
mov ebp,eax
|
|
|
|
jmp nf24_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf24_0:
|
|
nf24_11:mov ax, bx
|
|
shl eax, 16
|
|
nf24_12:mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
nf24_13:mov ax, bx
|
|
shl eax, 16
|
|
nf24_14:mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
|
|
nf24_21:mov ax, bx
|
|
shl eax, 16
|
|
nf24_22:mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
nf24_23:mov ax, bx
|
|
shl eax, 16
|
|
nf24_24:mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
|
|
nf24_31:mov ax, bx
|
|
shl eax, 16
|
|
nf24_32:mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
nf24_33:mov ax, bx
|
|
shl eax, 16
|
|
nf24_34:mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
|
|
nf24_41:mov ax, bx
|
|
shl eax, 16
|
|
nf24_42:mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
nf24_43:mov ax, bx
|
|
shl eax, 16
|
|
nf24_44:mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
|
|
lea eax, [esi*8-4]
|
|
sub edi, eax
|
|
|
|
mov eax, [esp]
|
|
mov cx, [eax+6]
|
|
mov bl,cl
|
|
mov bh,cl
|
|
mov dl,ch
|
|
mov dh,cl
|
|
mov al,ch
|
|
mov ah,ch
|
|
mov ebp,eax
|
|
|
|
nf24_51:mov ax, bx
|
|
shl eax, 16
|
|
nf24_52:mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
nf24_53:mov ax, bx
|
|
shl eax, 16
|
|
nf24_54:mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
|
|
nf24_61:mov ax, bx
|
|
shl eax, 16
|
|
nf24_62:mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
nf24_63:mov ax, bx
|
|
shl eax, 16
|
|
nf24_64:mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
|
|
nf24_71:mov ax, bx
|
|
shl eax, 16
|
|
nf24_72:mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
nf24_73:mov ax, bx
|
|
shl eax, 16
|
|
nf24_74:mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
|
|
nf24_81:mov ax, bx
|
|
shl eax, 16
|
|
nf24_82:mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
nf24_83:mov ax, bx
|
|
shl eax, 16
|
|
nf24_84:mov ax, bx
|
|
mov [edi], eax
|
|
|
|
pop esi
|
|
pop ebp
|
|
add esi, 12
|
|
sub edi, 4
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
;nf8+32
|
|
nf40: ; 1x2 8x4x1 (12 bytes)
|
|
|
|
xor eax, eax
|
|
lea ecx, nfpk_mov8
|
|
lea edx, byte ptr ds:nf40_11+2
|
|
|
|
mov al, [esi+2]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf40_11-nf40_11)], bl
|
|
mov [edx+(nf40_12-nf40_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf40_13-nf40_11)], bl
|
|
mov [edx+(nf40_14-nf40_11)], bh
|
|
|
|
mov al, [esi+3]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf40_21-nf40_11)], bl
|
|
mov [edx+(nf40_22-nf40_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf40_23-nf40_11)], bl
|
|
mov [edx+(nf40_24-nf40_11)], bh
|
|
|
|
|
|
mov al, [esi+4]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf40_31-nf40_11)], bl
|
|
mov [edx+(nf40_32-nf40_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf40_33-nf40_11)], bl
|
|
mov [edx+(nf40_34-nf40_11)], bh
|
|
|
|
mov al, [esi+5]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf40_41-nf40_11)], bl
|
|
mov [edx+(nf40_42-nf40_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf40_43-nf40_11)], bl
|
|
mov [edx+(nf40_44-nf40_11)], bh
|
|
|
|
add edx, nf40_51-nf40_11
|
|
|
|
mov al, [esi+8]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf40_51-nf40_51)], bl
|
|
mov [edx+(nf40_52-nf40_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf40_53-nf40_51)], bl
|
|
mov [edx+(nf40_54-nf40_51)], bh
|
|
|
|
mov al, [esi+9]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf40_61-nf40_51)], bl
|
|
mov [edx+(nf40_62-nf40_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf40_63-nf40_51)], bl
|
|
mov [edx+(nf40_64-nf40_51)], bh
|
|
|
|
|
|
mov al, [esi+10]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf40_71-nf40_51)], bl
|
|
mov [edx+(nf40_72-nf40_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf40_73-nf40_51)], bl
|
|
mov [edx+(nf40_74-nf40_51)], bh
|
|
|
|
mov al, [esi+11]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf40_81-nf40_51)], bl
|
|
mov [edx+(nf40_82-nf40_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf40_83-nf40_51)], bl
|
|
mov [edx+(nf40_84-nf40_51)], bh
|
|
|
|
|
|
push ebp
|
|
push esi
|
|
; load bx,dx,cx,bp with 00,01,10,11 color combinations
|
|
; (note that bits are read least significant first).
|
|
mov cx, [esi]
|
|
mov esi, nf_width
|
|
mov bl,cl
|
|
mov bh,cl
|
|
mov dl,ch
|
|
mov dh,cl
|
|
mov al,ch
|
|
mov ah,ch
|
|
mov ebp,eax
|
|
|
|
jmp nf40_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf40_0:
|
|
nf40_11:mov ax, bx
|
|
shl eax, 16
|
|
nf40_12:mov ax, bx
|
|
mov [edi], eax
|
|
nf40_13:mov ax, bx
|
|
shl eax, 16
|
|
nf40_14:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf40_21:mov ax, bx
|
|
shl eax, 16
|
|
nf40_22:mov ax, bx
|
|
mov [edi], eax
|
|
nf40_23:mov ax, bx
|
|
shl eax, 16
|
|
nf40_24:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf40_31:mov ax, bx
|
|
shl eax, 16
|
|
nf40_32:mov ax, bx
|
|
mov [edi], eax
|
|
nf40_33:mov ax, bx
|
|
shl eax, 16
|
|
nf40_34:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf40_41:mov ax, bx
|
|
shl eax, 16
|
|
nf40_42:mov ax, bx
|
|
mov [edi], eax
|
|
nf40_43:mov ax, bx
|
|
shl eax, 16
|
|
nf40_44:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
mov eax, [esp]
|
|
mov cx, [eax+6]
|
|
mov bl,cl
|
|
mov bh,cl
|
|
mov dl,ch
|
|
mov dh,cl
|
|
mov al,ch
|
|
mov ah,ch
|
|
mov ebp,eax
|
|
|
|
nf40_51:mov ax, bx
|
|
shl eax, 16
|
|
nf40_52:mov ax, bx
|
|
mov [edi], eax
|
|
nf40_53:mov ax, bx
|
|
shl eax, 16
|
|
nf40_54:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf40_61:mov ax, bx
|
|
shl eax, 16
|
|
nf40_62:mov ax, bx
|
|
mov [edi], eax
|
|
nf40_63:mov ax, bx
|
|
shl eax, 16
|
|
nf40_64:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf40_71:mov ax, bx
|
|
shl eax, 16
|
|
nf40_72:mov ax, bx
|
|
mov [edi], eax
|
|
nf40_73:mov ax, bx
|
|
shl eax, 16
|
|
nf40_74:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf40_81:mov ax, bx
|
|
shl eax, 16
|
|
nf40_82:mov ax, bx
|
|
mov [edi], eax
|
|
nf40_83:mov ax, bx
|
|
shl eax, 16
|
|
nf40_84:mov ax, bx
|
|
mov [edi+4], eax
|
|
|
|
pop esi
|
|
pop ebp
|
|
add esi, 12
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf9: ; 8x8x2 (20 bytes)
|
|
|
|
mov eax, [esi]
|
|
cmp al, ah
|
|
ja nf41
|
|
|
|
shr eax, 16
|
|
cmp al, ah
|
|
ja nf25
|
|
|
|
xor eax, eax
|
|
lea ecx, nfpk_mov4
|
|
lea edx, byte ptr ds:nf9_11+1
|
|
|
|
mov al, [esi+4]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_11-nf9_11)], bl
|
|
mov [edx+(nf9_12-nf9_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf9_13-nf9_11)], bl
|
|
mov [edx+(nf9_14-nf9_11)], bh
|
|
|
|
mov al, [esi+5]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_15-nf9_11)], bl
|
|
mov [edx+(nf9_16-nf9_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf9_17-nf9_11)], bl
|
|
mov [edx+(nf9_18-nf9_11)], bh
|
|
|
|
|
|
mov al, [esi+6]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_21-nf9_11)], bl
|
|
mov [edx+(nf9_22-nf9_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf9_23-nf9_11)], bl
|
|
mov [edx+(nf9_24-nf9_11)], bh
|
|
|
|
mov al, [esi+7]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_25-nf9_11)], bl
|
|
mov [edx+(nf9_26-nf9_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf9_27-nf9_11)], bl
|
|
mov [edx+(nf9_28-nf9_11)], bh
|
|
|
|
|
|
mov al, [esi+8]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_31-nf9_11)], bl
|
|
mov [edx+(nf9_32-nf9_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf9_33-nf9_11)], bl
|
|
mov [edx+(nf9_34-nf9_11)], bh
|
|
|
|
mov al, [esi+9]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_35-nf9_11)], bl
|
|
mov [edx+(nf9_36-nf9_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf9_37-nf9_11)], bl
|
|
mov [edx+(nf9_38-nf9_11)], bh
|
|
|
|
|
|
mov al, [esi+10]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_41-nf9_11)], bl
|
|
mov [edx+(nf9_42-nf9_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf9_43-nf9_11)], bl
|
|
mov [edx+(nf9_44-nf9_11)], bh
|
|
|
|
mov al, [esi+11]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_45-nf9_11)], bl
|
|
mov [edx+(nf9_46-nf9_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf9_47-nf9_11)], bl
|
|
mov [edx+(nf9_48-nf9_11)], bh
|
|
|
|
|
|
lea edx, [edx+(nf9_51-nf9_11)]
|
|
|
|
mov al, [esi+12]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_51-nf9_51)], bl
|
|
mov [edx+(nf9_52-nf9_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf9_53-nf9_51)], bl
|
|
mov [edx+(nf9_54-nf9_51)], bh
|
|
|
|
mov al, [esi+13]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_55-nf9_51)], bl
|
|
mov [edx+(nf9_56-nf9_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf9_57-nf9_51)], bl
|
|
mov [edx+(nf9_58-nf9_51)], bh
|
|
|
|
|
|
mov al, [esi+14]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_61-nf9_51)], bl
|
|
mov [edx+(nf9_62-nf9_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf9_63-nf9_51)], bl
|
|
mov [edx+(nf9_64-nf9_51)], bh
|
|
|
|
mov al, [esi+15]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_65-nf9_51)], bl
|
|
mov [edx+(nf9_66-nf9_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf9_67-nf9_51)], bl
|
|
mov [edx+(nf9_68-nf9_51)], bh
|
|
|
|
|
|
mov al, [esi+16]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_71-nf9_51)], bl
|
|
mov [edx+(nf9_72-nf9_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf9_73-nf9_51)], bl
|
|
mov [edx+(nf9_74-nf9_51)], bh
|
|
|
|
mov al, [esi+17]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_75-nf9_51)], bl
|
|
mov [edx+(nf9_76-nf9_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf9_77-nf9_51)], bl
|
|
mov [edx+(nf9_78-nf9_51)], bh
|
|
|
|
|
|
mov al, [esi+18]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_81-nf9_51)], bl
|
|
mov [edx+(nf9_82-nf9_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf9_83-nf9_51)], bl
|
|
mov [edx+(nf9_84-nf9_51)], bh
|
|
|
|
mov al, [esi+19]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_85-nf9_51)], bl
|
|
mov [edx+(nf9_86-nf9_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf9_87-nf9_51)], bl
|
|
mov [edx+(nf9_88-nf9_51)], bh
|
|
|
|
; Load bl,bh,cl,ch with four colors
|
|
mov bx, [esi]
|
|
mov cx, [esi+2]
|
|
|
|
mov edx, nf_width
|
|
jmp nf9_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf9_0:
|
|
nf9_11: mov al, bl
|
|
nf9_12: mov ah, bl
|
|
shl eax, 16
|
|
nf9_13: mov al, bl
|
|
nf9_14: mov ah, bl
|
|
mov [edi], eax
|
|
|
|
nf9_15: mov al, bl
|
|
nf9_16: mov ah, bl
|
|
shl eax, 16
|
|
nf9_17: mov al, bl
|
|
nf9_18: mov ah, bl
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf9_21: mov al, bl
|
|
nf9_22: mov ah, bl
|
|
shl eax, 16
|
|
nf9_23: mov al, bl
|
|
nf9_24: mov ah, bl
|
|
mov [edi], eax
|
|
|
|
nf9_25: mov al, bl
|
|
nf9_26: mov ah, bl
|
|
shl eax, 16
|
|
nf9_27: mov al, bl
|
|
nf9_28: mov ah, bl
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf9_31: mov al, bl
|
|
nf9_32: mov ah, bl
|
|
shl eax, 16
|
|
nf9_33: mov al, bl
|
|
nf9_34: mov ah, bl
|
|
mov [edi], eax
|
|
|
|
nf9_35: mov al, bl
|
|
nf9_36: mov ah, bl
|
|
shl eax, 16
|
|
nf9_37: mov al, bl
|
|
nf9_38: mov ah, bl
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf9_41: mov al, bl
|
|
nf9_42: mov ah, bl
|
|
shl eax, 16
|
|
nf9_43: mov al, bl
|
|
nf9_44: mov ah, bl
|
|
mov [edi], eax
|
|
|
|
nf9_45: mov al, bl
|
|
nf9_46: mov ah, bl
|
|
shl eax, 16
|
|
nf9_47: mov al, bl
|
|
nf9_48: mov ah, bl
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf9_51: mov al, bl
|
|
nf9_52: mov ah, bl
|
|
shl eax, 16
|
|
nf9_53: mov al, bl
|
|
nf9_54: mov ah, bl
|
|
mov [edi], eax
|
|
|
|
nf9_55: mov al, bl
|
|
nf9_56: mov ah, bl
|
|
shl eax, 16
|
|
nf9_57: mov al, bl
|
|
nf9_58: mov ah, bl
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf9_61: mov al, bl
|
|
nf9_62: mov ah, bl
|
|
shl eax, 16
|
|
nf9_63: mov al, bl
|
|
nf9_64: mov ah, bl
|
|
mov [edi], eax
|
|
|
|
nf9_65: mov al, bl
|
|
nf9_66: mov ah, bl
|
|
shl eax, 16
|
|
nf9_67: mov al, bl
|
|
nf9_68: mov ah, bl
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf9_71: mov al, bl
|
|
nf9_72: mov ah, bl
|
|
shl eax, 16
|
|
nf9_73: mov al, bl
|
|
nf9_74: mov ah, bl
|
|
mov [edi], eax
|
|
|
|
nf9_75: mov al, bl
|
|
nf9_76: mov ah, bl
|
|
shl eax, 16
|
|
nf9_77: mov al, bl
|
|
nf9_78: mov ah, bl
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf9_81: mov al, bl
|
|
nf9_82: mov ah, bl
|
|
shl eax, 16
|
|
nf9_83: mov al, bl
|
|
nf9_84: mov ah, bl
|
|
mov [edi], eax
|
|
|
|
nf9_85: mov al, bl
|
|
nf9_86: mov ah, bl
|
|
shl eax, 16
|
|
nf9_87: mov al, bl
|
|
nf9_88: mov ah, bl
|
|
mov [edi+4], eax
|
|
|
|
add esi, 20
|
|
sub edi, nfpk_back_right
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
;nf9+16
|
|
nf25: ; low 4x4x2 (8 bytes)
|
|
|
|
if 0 ;debug
|
|
mov eax, 0
|
|
mov ebx, 0
|
|
add esi, 8
|
|
jmp nf_solid
|
|
endif
|
|
|
|
xor eax, eax
|
|
lea ecx, nfpk_mov4
|
|
lea edx, byte ptr ds:nf25_11+1
|
|
|
|
mov al, [esi+4]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf25_14-nf25_11)], bl
|
|
mov [edx+(nf25_13-nf25_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf25_12-nf25_11)], bl
|
|
mov [edx+(nf25_11-nf25_11)], bh
|
|
|
|
mov al, [esi+5]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf25_24-nf25_11)], bl
|
|
mov [edx+(nf25_23-nf25_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf25_22-nf25_11)], bl
|
|
mov [edx+(nf25_21-nf25_11)], bh
|
|
|
|
|
|
mov al, [esi+6]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf25_34-nf25_11)], bl
|
|
mov [edx+(nf25_33-nf25_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf25_32-nf25_11)], bl
|
|
mov [edx+(nf25_31-nf25_11)], bh
|
|
|
|
mov al, [esi+7]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf25_44-nf25_11)], bl
|
|
mov [edx+(nf25_43-nf25_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf25_42-nf25_11)], bl
|
|
mov [edx+(nf25_41-nf25_11)], bh
|
|
|
|
; Load bl,bh,cl,ch with four colors
|
|
mov bx, [esi]
|
|
mov cx, [esi+2]
|
|
|
|
mov edx, nf_width
|
|
jmp nf25_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf25_0:
|
|
nf25_11:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf25_12:mov al, bl
|
|
mov ah, al
|
|
mov [edi], eax
|
|
mov [edi+edx], eax
|
|
nf25_13:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf25_14:mov al, bl
|
|
mov ah, al
|
|
mov [edi+4], eax
|
|
mov [edi+edx+4], eax
|
|
lea edi, [edi+edx*2]
|
|
|
|
nf25_21:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf25_22:mov al, bl
|
|
mov ah, al
|
|
mov [edi], eax
|
|
mov [edi+edx], eax
|
|
nf25_23:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf25_24:mov al, bl
|
|
mov ah, al
|
|
mov [edi+4], eax
|
|
mov [edi+edx+4], eax
|
|
lea edi, [edi+edx*2]
|
|
|
|
nf25_31:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf25_32:mov al, bl
|
|
mov ah, al
|
|
mov [edi], eax
|
|
mov [edi+edx], eax
|
|
nf25_33:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf25_34:mov al, bl
|
|
mov ah, al
|
|
mov [edi+4], eax
|
|
mov [edi+edx+4], eax
|
|
lea edi, [edi+edx*2]
|
|
|
|
nf25_41:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf25_42:mov al, bl
|
|
mov ah, al
|
|
mov [edi], eax
|
|
mov [edi+edx], eax
|
|
nf25_43:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf25_44:mov al, bl
|
|
mov ah, al
|
|
mov [edi+4], eax
|
|
mov [edi+edx+4], eax
|
|
add edi, edx
|
|
|
|
add esi, 8
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
;nf9+32
|
|
nf41: ; low 4x8x2 (12 bytes)
|
|
shr eax, 16
|
|
cmp al, ah
|
|
ja nf57
|
|
|
|
xor eax, eax
|
|
lea ecx, nfpk_mov4
|
|
lea edx, byte ptr ds:nf41_11+1
|
|
|
|
mov al, [esi+4]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf41_14-nf41_11)], bl
|
|
mov [edx+(nf41_13-nf41_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf41_12-nf41_11)], bl
|
|
mov [edx+(nf41_11-nf41_11)], bh
|
|
|
|
mov al, [esi+5]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf41_24-nf41_11)], bl
|
|
mov [edx+(nf41_23-nf41_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf41_22-nf41_11)], bl
|
|
mov [edx+(nf41_21-nf41_11)], bh
|
|
|
|
|
|
mov al, [esi+6]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf41_34-nf41_11)], bl
|
|
mov [edx+(nf41_33-nf41_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf41_32-nf41_11)], bl
|
|
mov [edx+(nf41_31-nf41_11)], bh
|
|
|
|
mov al, [esi+7]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf41_44-nf41_11)], bl
|
|
mov [edx+(nf41_43-nf41_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf41_42-nf41_11)], bl
|
|
mov [edx+(nf41_41-nf41_11)], bh
|
|
|
|
lea edx, [edx+(nf41_51-nf41_11)]
|
|
|
|
mov al, [esi+8]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf41_54-nf41_51)], bl
|
|
mov [edx+(nf41_53-nf41_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf41_52-nf41_51)], bl
|
|
mov [edx+(nf41_51-nf41_51)], bh
|
|
|
|
mov al, [esi+9]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf41_64-nf41_51)], bl
|
|
mov [edx+(nf41_63-nf41_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf41_62-nf41_51)], bl
|
|
mov [edx+(nf41_61-nf41_51)], bh
|
|
|
|
|
|
mov al, [esi+10]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf41_74-nf41_51)], bl
|
|
mov [edx+(nf41_73-nf41_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf41_72-nf41_51)], bl
|
|
mov [edx+(nf41_71-nf41_51)], bh
|
|
|
|
mov al, [esi+11]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf41_84-nf41_51)], bl
|
|
mov [edx+(nf41_83-nf41_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf41_82-nf41_51)], bl
|
|
mov [edx+(nf41_81-nf41_51)], bh
|
|
|
|
; Load bl,bh,cl,ch with four colors
|
|
mov bx, [esi]
|
|
mov cx, [esi+2]
|
|
|
|
mov edx, nf_width
|
|
jmp nf41_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf41_0:
|
|
nf41_11:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf41_12:mov al, bl
|
|
mov ah, al
|
|
mov [edi], eax
|
|
nf41_13:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf41_14:mov al, bl
|
|
mov ah, al
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf41_21:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf41_22:mov al, bl
|
|
mov ah, al
|
|
mov [edi], eax
|
|
nf41_23:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf41_24:mov al, bl
|
|
mov ah, al
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf41_31:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf41_32:mov al, bl
|
|
mov ah, al
|
|
mov [edi], eax
|
|
nf41_33:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf41_34:mov al, bl
|
|
mov ah, al
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf41_41:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf41_42:mov al, bl
|
|
mov ah, al
|
|
mov [edi], eax
|
|
nf41_43:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf41_44:mov al, bl
|
|
mov ah, al
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf41_51:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf41_52:mov al, bl
|
|
mov ah, al
|
|
mov [edi], eax
|
|
nf41_53:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf41_54:mov al, bl
|
|
mov ah, al
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf41_61:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf41_62:mov al, bl
|
|
mov ah, al
|
|
mov [edi], eax
|
|
nf41_63:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf41_64:mov al, bl
|
|
mov ah, al
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf41_71:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf41_72:mov al, bl
|
|
mov ah, al
|
|
mov [edi], eax
|
|
nf41_73:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf41_74:mov al, bl
|
|
mov ah, al
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf41_81:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf41_82:mov al, bl
|
|
mov ah, al
|
|
mov [edi], eax
|
|
nf41_83:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf41_84:mov al, bl
|
|
mov ah, al
|
|
mov [edi+4], eax
|
|
|
|
add esi, 12
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
;nf9+48
|
|
nf57: ; low 8x4x2 (12 bytes)
|
|
xor eax, eax
|
|
lea ecx, nfpk_mov4
|
|
lea edx, byte ptr ds:nf57_11+1
|
|
|
|
mov al, [esi+4]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf57_11-nf57_11)], bl
|
|
mov [edx+(nf57_12-nf57_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf57_13-nf57_11)], bl
|
|
mov [edx+(nf57_14-nf57_11)], bh
|
|
|
|
mov al, [esi+5]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf57_15-nf57_11)], bl
|
|
mov [edx+(nf57_16-nf57_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf57_17-nf57_11)], bl
|
|
mov [edx+(nf57_18-nf57_11)], bh
|
|
|
|
|
|
mov al, [esi+6]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf57_21-nf57_11)], bl
|
|
mov [edx+(nf57_22-nf57_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf57_23-nf57_11)], bl
|
|
mov [edx+(nf57_24-nf57_11)], bh
|
|
|
|
mov al, [esi+7]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf57_25-nf57_11)], bl
|
|
mov [edx+(nf57_26-nf57_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf57_27-nf57_11)], bl
|
|
mov [edx+(nf57_28-nf57_11)], bh
|
|
|
|
|
|
mov al, [esi+8]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf57_31-nf57_11)], bl
|
|
mov [edx+(nf57_32-nf57_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf57_33-nf57_11)], bl
|
|
mov [edx+(nf57_34-nf57_11)], bh
|
|
|
|
mov al, [esi+9]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf57_35-nf57_11)], bl
|
|
mov [edx+(nf57_36-nf57_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf57_37-nf57_11)], bl
|
|
mov [edx+(nf57_38-nf57_11)], bh
|
|
|
|
|
|
mov al, [esi+10]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf57_41-nf57_11)], bl
|
|
mov [edx+(nf57_42-nf57_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf57_43-nf57_11)], bl
|
|
mov [edx+(nf57_44-nf57_11)], bh
|
|
|
|
mov al, [esi+11]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf57_45-nf57_11)], bl
|
|
mov [edx+(nf57_46-nf57_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf57_47-nf57_11)], bl
|
|
mov [edx+(nf57_48-nf57_11)], bh
|
|
|
|
; Load bl,bh,cl,ch with four colors
|
|
mov bx, [esi]
|
|
mov cx, [esi+2]
|
|
|
|
mov edx, nf_width
|
|
jmp nf57_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf57_0:
|
|
nf57_11:mov al, bl
|
|
nf57_12:mov ah, bl
|
|
shl eax, 16
|
|
nf57_13:mov al, bl
|
|
nf57_14:mov ah, bl
|
|
mov [edi], eax
|
|
mov [edi+edx], eax
|
|
|
|
nf57_15:mov al, bl
|
|
nf57_16:mov ah, bl
|
|
shl eax, 16
|
|
nf57_17:mov al, bl
|
|
nf57_18:mov ah, bl
|
|
mov [edi+4], eax
|
|
mov [edi+edx+4], eax
|
|
lea edi, [edi+edx*2]
|
|
|
|
nf57_21:mov al, bl
|
|
nf57_22:mov ah, bl
|
|
shl eax, 16
|
|
nf57_23:mov al, bl
|
|
nf57_24:mov ah, bl
|
|
mov [edi], eax
|
|
mov [edi+edx], eax
|
|
|
|
nf57_25:mov al, bl
|
|
nf57_26:mov ah, bl
|
|
shl eax, 16
|
|
nf57_27:mov al, bl
|
|
nf57_28:mov ah, bl
|
|
mov [edi+4], eax
|
|
mov [edi+edx+4], eax
|
|
lea edi, [edi+edx*2]
|
|
|
|
nf57_31:mov al, bl
|
|
nf57_32:mov ah, bl
|
|
shl eax, 16
|
|
nf57_33:mov al, bl
|
|
nf57_34:mov ah, bl
|
|
mov [edi], eax
|
|
mov [edi+edx], eax
|
|
|
|
nf57_35:mov al, bl
|
|
nf57_36:mov ah, bl
|
|
shl eax, 16
|
|
nf57_37:mov al, bl
|
|
nf57_38:mov ah, bl
|
|
mov [edi+4], eax
|
|
mov [edi+edx+4], eax
|
|
lea edi, [edi+edx*2]
|
|
|
|
nf57_41:mov al, bl
|
|
nf57_42:mov ah, bl
|
|
shl eax, 16
|
|
nf57_43:mov al, bl
|
|
nf57_44:mov ah, bl
|
|
mov [edi], eax
|
|
mov [edi+edx], eax
|
|
|
|
nf57_45:mov al, bl
|
|
nf57_46:mov ah, bl
|
|
shl eax, 16
|
|
nf57_47:mov al, bl
|
|
nf57_48:mov ah, bl
|
|
mov [edi+4], eax
|
|
mov [edi+edx+4], eax
|
|
add edi, edx
|
|
|
|
add esi, 12
|
|
sub edi, nfpk_back_right
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf10: ; 2x2 4x4x2 (32 bytes)
|
|
|
|
mov ax, [esi]
|
|
cmp al, ah
|
|
ja nf26
|
|
|
|
xor eax, eax
|
|
lea ecx, nfpk_mov4
|
|
lea edx, byte ptr ds:nf10_11+1
|
|
|
|
mov al, [esi+4]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_11-nf10_11)], bl
|
|
mov [edx+(nf10_12-nf10_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf10_13-nf10_11)], bl
|
|
mov [edx+(nf10_14-nf10_11)], bh
|
|
|
|
mov al, [esi+5]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_15-nf10_11)], bl
|
|
mov [edx+(nf10_16-nf10_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf10_17-nf10_11)], bl
|
|
mov [edx+(nf10_18-nf10_11)], bh
|
|
|
|
|
|
mov al, [esi+6]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_21-nf10_11)], bl
|
|
mov [edx+(nf10_22-nf10_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf10_23-nf10_11)], bl
|
|
mov [edx+(nf10_24-nf10_11)], bh
|
|
|
|
mov al, [esi+7]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_25-nf10_11)], bl
|
|
mov [edx+(nf10_26-nf10_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf10_27-nf10_11)], bl
|
|
mov [edx+(nf10_28-nf10_11)], bh
|
|
|
|
|
|
mov al, [esi+12]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_31-nf10_11)], bl
|
|
mov [edx+(nf10_32-nf10_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf10_33-nf10_11)], bl
|
|
mov [edx+(nf10_34-nf10_11)], bh
|
|
|
|
mov al, [esi+13]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_35-nf10_11)], bl
|
|
mov [edx+(nf10_36-nf10_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf10_37-nf10_11)], bl
|
|
mov [edx+(nf10_38-nf10_11)], bh
|
|
|
|
|
|
mov al, [esi+14]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_41-nf10_11)], bl
|
|
mov [edx+(nf10_42-nf10_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf10_43-nf10_11)], bl
|
|
mov [edx+(nf10_44-nf10_11)], bh
|
|
|
|
mov al, [esi+15]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_45-nf10_11)], bl
|
|
mov [edx+(nf10_46-nf10_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf10_47-nf10_11)], bl
|
|
mov [edx+(nf10_48-nf10_11)], bh
|
|
|
|
|
|
lea edx, [edx+(nf10_51-nf10_11)]
|
|
|
|
mov al, [esi+20]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_51-nf10_51)], bl
|
|
mov [edx+(nf10_52-nf10_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf10_53-nf10_51)], bl
|
|
mov [edx+(nf10_54-nf10_51)], bh
|
|
|
|
mov al, [esi+21]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_55-nf10_51)], bl
|
|
mov [edx+(nf10_56-nf10_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf10_57-nf10_51)], bl
|
|
mov [edx+(nf10_58-nf10_51)], bh
|
|
|
|
|
|
mov al, [esi+22]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_61-nf10_51)], bl
|
|
mov [edx+(nf10_62-nf10_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf10_63-nf10_51)], bl
|
|
mov [edx+(nf10_64-nf10_51)], bh
|
|
|
|
mov al, [esi+23]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_65-nf10_51)], bl
|
|
mov [edx+(nf10_66-nf10_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf10_67-nf10_51)], bl
|
|
mov [edx+(nf10_68-nf10_51)], bh
|
|
|
|
|
|
mov al, [esi+28]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_71-nf10_51)], bl
|
|
mov [edx+(nf10_72-nf10_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf10_73-nf10_51)], bl
|
|
mov [edx+(nf10_74-nf10_51)], bh
|
|
|
|
mov al, [esi+29]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_75-nf10_51)], bl
|
|
mov [edx+(nf10_76-nf10_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf10_77-nf10_51)], bl
|
|
mov [edx+(nf10_78-nf10_51)], bh
|
|
|
|
|
|
mov al, [esi+30]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_81-nf10_51)], bl
|
|
mov [edx+(nf10_82-nf10_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf10_83-nf10_51)], bl
|
|
mov [edx+(nf10_84-nf10_51)], bh
|
|
|
|
mov al, [esi+31]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_85-nf10_51)], bl
|
|
mov [edx+(nf10_86-nf10_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf10_87-nf10_51)], bl
|
|
mov [edx+(nf10_88-nf10_51)], bh
|
|
|
|
; Load bl,bh,cl,ch with four colors
|
|
mov bx, [esi]
|
|
mov cx, [esi+2]
|
|
|
|
mov edx, nf_width
|
|
jmp nf10_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf10_0:
|
|
nf10_11:mov al, bl
|
|
nf10_12:mov ah, bl
|
|
shl eax, 16
|
|
nf10_13:mov al, bl
|
|
nf10_14:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf10_15:mov al, bl
|
|
nf10_16:mov ah, bl
|
|
shl eax, 16
|
|
nf10_17:mov al, bl
|
|
nf10_18:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf10_21:mov al, bl
|
|
nf10_22:mov ah, bl
|
|
shl eax, 16
|
|
nf10_23:mov al, bl
|
|
nf10_24:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf10_25:mov al, bl
|
|
nf10_26:mov ah, bl
|
|
shl eax, 16
|
|
nf10_27:mov al, bl
|
|
nf10_28:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
; Load bl,bh,cl,ch with four colors
|
|
mov bx, [esi+8]
|
|
mov cx, [esi+10]
|
|
|
|
nf10_31:mov al, bl
|
|
nf10_32:mov ah, bl
|
|
shl eax, 16
|
|
nf10_33:mov al, bl
|
|
nf10_34:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf10_35:mov al, bl
|
|
nf10_36:mov ah, bl
|
|
shl eax, 16
|
|
nf10_37:mov al, bl
|
|
nf10_38:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf10_41:mov al, bl
|
|
nf10_42:mov ah, bl
|
|
shl eax, 16
|
|
nf10_43:mov al, bl
|
|
nf10_44:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf10_45:mov al, bl
|
|
nf10_46:mov ah, bl
|
|
shl eax, 16
|
|
nf10_47:mov al, bl
|
|
nf10_48:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
lea eax, [edx*8-4]
|
|
sub edi, eax
|
|
|
|
; Load bl,bh,cl,ch with four colors
|
|
mov bx, [esi+16]
|
|
mov cx, [esi+18]
|
|
|
|
nf10_51:mov al, bl
|
|
nf10_52:mov ah, bl
|
|
shl eax, 16
|
|
nf10_53:mov al, bl
|
|
nf10_54:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf10_55:mov al, bl
|
|
nf10_56:mov ah, bl
|
|
shl eax, 16
|
|
nf10_57:mov al, bl
|
|
nf10_58:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf10_61:mov al, bl
|
|
nf10_62:mov ah, bl
|
|
shl eax, 16
|
|
nf10_63:mov al, bl
|
|
nf10_64:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf10_65:mov al, bl
|
|
nf10_66:mov ah, bl
|
|
shl eax, 16
|
|
nf10_67:mov al, bl
|
|
nf10_68:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
; Load bl,bh,cl,ch with four colors
|
|
mov bx, [esi+24]
|
|
mov cx, [esi+26]
|
|
|
|
nf10_71:mov al, bl
|
|
nf10_72:mov ah, bl
|
|
shl eax, 16
|
|
nf10_73:mov al, bl
|
|
nf10_74:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf10_75:mov al, bl
|
|
nf10_76:mov ah, bl
|
|
shl eax, 16
|
|
nf10_77:mov al, bl
|
|
nf10_78:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf10_81:mov al, bl
|
|
nf10_82:mov ah, bl
|
|
shl eax, 16
|
|
nf10_83:mov al, bl
|
|
nf10_84:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf10_85:mov al, bl
|
|
nf10_86:mov ah, bl
|
|
shl eax, 16
|
|
nf10_87:mov al, bl
|
|
nf10_88:mov ah, bl
|
|
mov [edi], eax
|
|
|
|
add esi, 32
|
|
sub edi, 4
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
;nf10+16
|
|
nf26: ; 2x1 4x8x2 (24 bytes)
|
|
|
|
mov ax, [esi+12]
|
|
cmp al, ah
|
|
ja nf42
|
|
|
|
if 0 ;debug
|
|
mov eax, 0
|
|
mov ebx, 0
|
|
add esi, 24
|
|
jmp nf_solid
|
|
endif
|
|
|
|
xor eax, eax
|
|
lea ecx, nfpk_mov4
|
|
lea edx, byte ptr ds:nf26_11+1
|
|
|
|
mov al, [esi+4]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_11-nf26_11)], bl
|
|
mov [edx+(nf26_12-nf26_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf26_13-nf26_11)], bl
|
|
mov [edx+(nf26_14-nf26_11)], bh
|
|
|
|
mov al, [esi+5]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_15-nf26_11)], bl
|
|
mov [edx+(nf26_16-nf26_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf26_17-nf26_11)], bl
|
|
mov [edx+(nf26_18-nf26_11)], bh
|
|
|
|
|
|
mov al, [esi+6]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_21-nf26_11)], bl
|
|
mov [edx+(nf26_22-nf26_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf26_23-nf26_11)], bl
|
|
mov [edx+(nf26_24-nf26_11)], bh
|
|
|
|
mov al, [esi+7]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_25-nf26_11)], bl
|
|
mov [edx+(nf26_26-nf26_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf26_27-nf26_11)], bl
|
|
mov [edx+(nf26_28-nf26_11)], bh
|
|
|
|
mov al, [esi+8]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_31-nf26_11)], bl
|
|
mov [edx+(nf26_32-nf26_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf26_33-nf26_11)], bl
|
|
mov [edx+(nf26_34-nf26_11)], bh
|
|
|
|
mov al, [esi+9]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_35-nf26_11)], bl
|
|
mov [edx+(nf26_36-nf26_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf26_37-nf26_11)], bl
|
|
mov [edx+(nf26_38-nf26_11)], bh
|
|
|
|
|
|
mov al, [esi+10]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_41-nf26_11)], bl
|
|
mov [edx+(nf26_42-nf26_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf26_43-nf26_11)], bl
|
|
mov [edx+(nf26_44-nf26_11)], bh
|
|
|
|
mov al, [esi+11]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_45-nf26_11)], bl
|
|
mov [edx+(nf26_46-nf26_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf26_47-nf26_11)], bl
|
|
mov [edx+(nf26_48-nf26_11)], bh
|
|
|
|
|
|
lea edx, [edx+(nf26_51-nf26_11)]
|
|
|
|
mov al, [esi+16]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_51-nf26_51)], bl
|
|
mov [edx+(nf26_52-nf26_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf26_53-nf26_51)], bl
|
|
mov [edx+(nf26_54-nf26_51)], bh
|
|
|
|
mov al, [esi+17]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_55-nf26_51)], bl
|
|
mov [edx+(nf26_56-nf26_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf26_57-nf26_51)], bl
|
|
mov [edx+(nf26_58-nf26_51)], bh
|
|
|
|
|
|
mov al, [esi+18]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_61-nf26_51)], bl
|
|
mov [edx+(nf26_62-nf26_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf26_63-nf26_51)], bl
|
|
mov [edx+(nf26_64-nf26_51)], bh
|
|
|
|
mov al, [esi+19]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_65-nf26_51)], bl
|
|
mov [edx+(nf26_66-nf26_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf26_67-nf26_51)], bl
|
|
mov [edx+(nf26_68-nf26_51)], bh
|
|
|
|
|
|
mov al, [esi+20]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_71-nf26_51)], bl
|
|
mov [edx+(nf26_72-nf26_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf26_73-nf26_51)], bl
|
|
mov [edx+(nf26_74-nf26_51)], bh
|
|
|
|
mov al, [esi+21]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_75-nf26_51)], bl
|
|
mov [edx+(nf26_76-nf26_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf26_77-nf26_51)], bl
|
|
mov [edx+(nf26_78-nf26_51)], bh
|
|
|
|
|
|
mov al, [esi+22]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_81-nf26_51)], bl
|
|
mov [edx+(nf26_82-nf26_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf26_83-nf26_51)], bl
|
|
mov [edx+(nf26_84-nf26_51)], bh
|
|
|
|
mov al, [esi+23]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_85-nf26_51)], bl
|
|
mov [edx+(nf26_86-nf26_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf26_87-nf26_51)], bl
|
|
mov [edx+(nf26_88-nf26_51)], bh
|
|
|
|
; Load bl,bh,cl,ch with four colors
|
|
mov bx, [esi]
|
|
mov cx, [esi+2]
|
|
|
|
mov edx, nf_width
|
|
jmp nf26_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf26_0:
|
|
nf26_11:mov al, bl
|
|
nf26_12:mov ah, bl
|
|
shl eax, 16
|
|
nf26_13:mov al, bl
|
|
nf26_14:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf26_15:mov al, bl
|
|
nf26_16:mov ah, bl
|
|
shl eax, 16
|
|
nf26_17:mov al, bl
|
|
nf26_18:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf26_21:mov al, bl
|
|
nf26_22:mov ah, bl
|
|
shl eax, 16
|
|
nf26_23:mov al, bl
|
|
nf26_24:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf26_25:mov al, bl
|
|
nf26_26:mov ah, bl
|
|
shl eax, 16
|
|
nf26_27:mov al, bl
|
|
nf26_28:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf26_31:mov al, bl
|
|
nf26_32:mov ah, bl
|
|
shl eax, 16
|
|
nf26_33:mov al, bl
|
|
nf26_34:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf26_35:mov al, bl
|
|
nf26_36:mov ah, bl
|
|
shl eax, 16
|
|
nf26_37:mov al, bl
|
|
nf26_38:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf26_41:mov al, bl
|
|
nf26_42:mov ah, bl
|
|
shl eax, 16
|
|
nf26_43:mov al, bl
|
|
nf26_44:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf26_45:mov al, bl
|
|
nf26_46:mov ah, bl
|
|
shl eax, 16
|
|
nf26_47:mov al, bl
|
|
nf26_48:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
lea eax, [edx*8-4]
|
|
sub edi, eax
|
|
|
|
; Load bl,bh,cl,ch with four colors
|
|
mov bx, [esi+12]
|
|
mov cx, [esi+14]
|
|
|
|
nf26_51:mov al, bl
|
|
nf26_52:mov ah, bl
|
|
shl eax, 16
|
|
nf26_53:mov al, bl
|
|
nf26_54:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf26_55:mov al, bl
|
|
nf26_56:mov ah, bl
|
|
shl eax, 16
|
|
nf26_57:mov al, bl
|
|
nf26_58:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf26_61:mov al, bl
|
|
nf26_62:mov ah, bl
|
|
shl eax, 16
|
|
nf26_63:mov al, bl
|
|
nf26_64:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf26_65:mov al, bl
|
|
nf26_66:mov ah, bl
|
|
shl eax, 16
|
|
nf26_67:mov al, bl
|
|
nf26_68:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf26_71:mov al, bl
|
|
nf26_72:mov ah, bl
|
|
shl eax, 16
|
|
nf26_73:mov al, bl
|
|
nf26_74:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf26_75:mov al, bl
|
|
nf26_76:mov ah, bl
|
|
shl eax, 16
|
|
nf26_77:mov al, bl
|
|
nf26_78:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf26_81:mov al, bl
|
|
nf26_82:mov ah, bl
|
|
shl eax, 16
|
|
nf26_83:mov al, bl
|
|
nf26_84:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf26_85:mov al, bl
|
|
nf26_86:mov ah, bl
|
|
shl eax, 16
|
|
nf26_87:mov al, bl
|
|
nf26_88:mov ah, bl
|
|
mov [edi], eax
|
|
|
|
add esi, 24
|
|
sub edi, 4
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
;nf10+32
|
|
nf42: ; 1x2 8x4x2 (24 bytes)
|
|
|
|
if 0 ;debug
|
|
mov eax, 0
|
|
mov ebx, 0
|
|
add esi, 24
|
|
jmp nf_solid
|
|
endif
|
|
xor eax, eax
|
|
lea ecx, nfpk_mov4
|
|
lea edx, byte ptr ds:nf42_11+1
|
|
|
|
mov al, [esi+4]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_11-nf42_11)], bl
|
|
mov [edx+(nf42_12-nf42_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf42_13-nf42_11)], bl
|
|
mov [edx+(nf42_14-nf42_11)], bh
|
|
|
|
mov al, [esi+5]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_15-nf42_11)], bl
|
|
mov [edx+(nf42_16-nf42_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf42_17-nf42_11)], bl
|
|
mov [edx+(nf42_18-nf42_11)], bh
|
|
|
|
|
|
mov al, [esi+6]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_21-nf42_11)], bl
|
|
mov [edx+(nf42_22-nf42_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf42_23-nf42_11)], bl
|
|
mov [edx+(nf42_24-nf42_11)], bh
|
|
|
|
mov al, [esi+7]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_25-nf42_11)], bl
|
|
mov [edx+(nf42_26-nf42_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf42_27-nf42_11)], bl
|
|
mov [edx+(nf42_28-nf42_11)], bh
|
|
|
|
|
|
mov al, [esi+8]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_31-nf42_11)], bl
|
|
mov [edx+(nf42_32-nf42_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf42_33-nf42_11)], bl
|
|
mov [edx+(nf42_34-nf42_11)], bh
|
|
|
|
mov al, [esi+9]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_35-nf42_11)], bl
|
|
mov [edx+(nf42_36-nf42_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf42_37-nf42_11)], bl
|
|
mov [edx+(nf42_38-nf42_11)], bh
|
|
|
|
|
|
mov al, [esi+10]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_41-nf42_11)], bl
|
|
mov [edx+(nf42_42-nf42_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf42_43-nf42_11)], bl
|
|
mov [edx+(nf42_44-nf42_11)], bh
|
|
|
|
mov al, [esi+11]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_45-nf42_11)], bl
|
|
mov [edx+(nf42_46-nf42_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf42_47-nf42_11)], bl
|
|
mov [edx+(nf42_48-nf42_11)], bh
|
|
|
|
|
|
lea edx, [edx+(nf42_51-nf42_11)]
|
|
|
|
mov al, [esi+16]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_51-nf42_51)], bl
|
|
mov [edx+(nf42_52-nf42_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf42_53-nf42_51)], bl
|
|
mov [edx+(nf42_54-nf42_51)], bh
|
|
|
|
mov al, [esi+17]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_55-nf42_51)], bl
|
|
mov [edx+(nf42_56-nf42_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf42_57-nf42_51)], bl
|
|
mov [edx+(nf42_58-nf42_51)], bh
|
|
|
|
|
|
mov al, [esi+18]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_61-nf42_51)], bl
|
|
mov [edx+(nf42_62-nf42_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf42_63-nf42_51)], bl
|
|
mov [edx+(nf42_64-nf42_51)], bh
|
|
|
|
mov al, [esi+19]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_65-nf42_51)], bl
|
|
mov [edx+(nf42_66-nf42_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf42_67-nf42_51)], bl
|
|
mov [edx+(nf42_68-nf42_51)], bh
|
|
|
|
|
|
mov al, [esi+20]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_71-nf42_51)], bl
|
|
mov [edx+(nf42_72-nf42_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf42_73-nf42_51)], bl
|
|
mov [edx+(nf42_74-nf42_51)], bh
|
|
|
|
mov al, [esi+21]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_75-nf42_51)], bl
|
|
mov [edx+(nf42_76-nf42_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf42_77-nf42_51)], bl
|
|
mov [edx+(nf42_78-nf42_51)], bh
|
|
|
|
|
|
mov al, [esi+22]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_81-nf42_51)], bl
|
|
mov [edx+(nf42_82-nf42_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf42_83-nf42_51)], bl
|
|
mov [edx+(nf42_84-nf42_51)], bh
|
|
|
|
mov al, [esi+23]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_85-nf42_51)], bl
|
|
mov [edx+(nf42_86-nf42_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf42_87-nf42_51)], bl
|
|
mov [edx+(nf42_88-nf42_51)], bh
|
|
|
|
; Load bl,bh,cl,ch with four colors
|
|
mov bx, [esi]
|
|
mov cx, [esi+2]
|
|
|
|
mov edx, nf_width
|
|
jmp nf42_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf42_0:
|
|
nf42_11:mov al, bl
|
|
nf42_12:mov ah, bl
|
|
shl eax, 16
|
|
nf42_13:mov al, bl
|
|
nf42_14:mov ah, bl
|
|
mov [edi], eax
|
|
|
|
nf42_15:mov al, bl
|
|
nf42_16:mov ah, bl
|
|
shl eax, 16
|
|
nf42_17:mov al, bl
|
|
nf42_18:mov ah, bl
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf42_21:mov al, bl
|
|
nf42_22:mov ah, bl
|
|
shl eax, 16
|
|
nf42_23:mov al, bl
|
|
nf42_24:mov ah, bl
|
|
mov [edi], eax
|
|
|
|
nf42_25:mov al, bl
|
|
nf42_26:mov ah, bl
|
|
shl eax, 16
|
|
nf42_27:mov al, bl
|
|
nf42_28:mov ah, bl
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf42_31:mov al, bl
|
|
nf42_32:mov ah, bl
|
|
shl eax, 16
|
|
nf42_33:mov al, bl
|
|
nf42_34:mov ah, bl
|
|
mov [edi], eax
|
|
|
|
nf42_35:mov al, bl
|
|
nf42_36:mov ah, bl
|
|
shl eax, 16
|
|
nf42_37:mov al, bl
|
|
nf42_38:mov ah, bl
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf42_41:mov al, bl
|
|
nf42_42:mov ah, bl
|
|
shl eax, 16
|
|
nf42_43:mov al, bl
|
|
nf42_44:mov ah, bl
|
|
mov [edi], eax
|
|
|
|
nf42_45:mov al, bl
|
|
nf42_46:mov ah, bl
|
|
shl eax, 16
|
|
nf42_47:mov al, bl
|
|
nf42_48:mov ah, bl
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
; Load bl,bh,cl,ch with four colors
|
|
mov bx, [esi+12]
|
|
mov cx, [esi+14]
|
|
|
|
nf42_51:mov al, bl
|
|
nf42_52:mov ah, bl
|
|
shl eax, 16
|
|
nf42_53:mov al, bl
|
|
nf42_54:mov ah, bl
|
|
mov [edi], eax
|
|
|
|
nf42_55:mov al, bl
|
|
nf42_56:mov ah, bl
|
|
shl eax, 16
|
|
nf42_57:mov al, bl
|
|
nf42_58:mov ah, bl
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf42_61:mov al, bl
|
|
nf42_62:mov ah, bl
|
|
shl eax, 16
|
|
nf42_63:mov al, bl
|
|
nf42_64:mov ah, bl
|
|
mov [edi], eax
|
|
|
|
nf42_65:mov al, bl
|
|
nf42_66:mov ah, bl
|
|
shl eax, 16
|
|
nf42_67:mov al, bl
|
|
nf42_68:mov ah, bl
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf42_71:mov al, bl
|
|
nf42_72:mov ah, bl
|
|
shl eax, 16
|
|
nf42_73:mov al, bl
|
|
nf42_74:mov ah, bl
|
|
mov [edi], eax
|
|
|
|
nf42_75:mov al, bl
|
|
nf42_76:mov ah, bl
|
|
shl eax, 16
|
|
nf42_77:mov al, bl
|
|
nf42_78:mov ah, bl
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf42_81:mov al, bl
|
|
nf42_82:mov ah, bl
|
|
shl eax, 16
|
|
nf42_83:mov al, bl
|
|
nf42_84:mov ah, bl
|
|
mov [edi], eax
|
|
|
|
nf42_85:mov al, bl
|
|
nf42_86:mov ah, bl
|
|
shl eax, 16
|
|
nf42_87:mov al, bl
|
|
nf42_88:mov ah, bl
|
|
mov [edi+4], eax
|
|
|
|
add esi, 24
|
|
sub edi, nfpk_back_right
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf11: ; 8x8x8 (64 bytes)
|
|
if 0 ;debug
|
|
add esi, 64
|
|
mov eax, 0fefefefeH
|
|
; mov ebx, eax
|
|
mov ebx, 0
|
|
jmp nf_solid
|
|
endif
|
|
mov edx, nf_width
|
|
|
|
mov eax, [esi] ;0
|
|
mov [edi], eax
|
|
mov eax, [esi+4]
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
mov eax, [esi+8] ;1
|
|
mov [edi], eax
|
|
mov eax, [esi+12]
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
mov eax, [esi+16] ;2
|
|
mov [edi], eax
|
|
mov eax, [esi+20]
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
mov eax, [esi+24] ;3
|
|
mov [edi], eax
|
|
mov eax, [esi+28]
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
mov eax, [esi+32] ;4
|
|
mov [edi], eax
|
|
mov eax, [esi+36]
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
mov eax, [esi+40] ;5
|
|
mov [edi], eax
|
|
mov eax, [esi+44]
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
mov eax, [esi+48] ;6
|
|
mov [edi], eax
|
|
mov eax, [esi+52]
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
mov eax, [esi+56] ;7
|
|
mov [edi], eax
|
|
mov eax, [esi+60]
|
|
mov [edi+4], eax
|
|
|
|
add esi, 64
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf12: ; low 4x4x8 (16 bytes)
|
|
mov edx, nf_width
|
|
|
|
mov eax, [esi]
|
|
mov bl, ah
|
|
mov bh, ah
|
|
shl ebx, 16
|
|
mov bl, al
|
|
mov bh, al
|
|
mov [edi], ebx
|
|
mov [edi+edx], ebx
|
|
shr eax, 16
|
|
mov bl, ah
|
|
mov bh, ah
|
|
shl ebx, 16
|
|
mov bl, al
|
|
mov bh, al
|
|
mov [edi+4], ebx
|
|
mov [edi+edx+4], ebx
|
|
lea edi, [edi+edx*2]
|
|
|
|
mov eax, [esi+4]
|
|
mov bl, ah
|
|
mov bh, ah
|
|
shl ebx, 16
|
|
mov bl, al
|
|
mov bh, al
|
|
mov [edi], ebx
|
|
mov [edi+edx], ebx
|
|
shr eax, 16
|
|
mov bl, ah
|
|
mov bh, ah
|
|
shl ebx, 16
|
|
mov bl, al
|
|
mov bh, al
|
|
mov [edi+4], ebx
|
|
mov [edi+edx+4], ebx
|
|
lea edi, [edi+edx*2]
|
|
|
|
mov eax, [esi+8]
|
|
mov bl, ah
|
|
mov bh, ah
|
|
shl ebx, 16
|
|
mov bl, al
|
|
mov bh, al
|
|
mov [edi], ebx
|
|
mov [edi+edx], ebx
|
|
shr eax, 16
|
|
mov bl, ah
|
|
mov bh, ah
|
|
shl ebx, 16
|
|
mov bl, al
|
|
mov bh, al
|
|
mov [edi+4], ebx
|
|
mov [edi+edx+4], ebx
|
|
lea edi, [edi+edx*2]
|
|
|
|
mov eax, [esi+12]
|
|
mov bl, ah
|
|
mov bh, ah
|
|
shl ebx, 16
|
|
mov bl, al
|
|
mov bh, al
|
|
mov [edi], ebx
|
|
mov [edi+edx], ebx
|
|
shr eax, 16
|
|
mov bl, ah
|
|
mov bh, ah
|
|
shl ebx, 16
|
|
mov bl, al
|
|
mov bh, al
|
|
mov [edi+4], ebx
|
|
mov [edi+edx+4], ebx
|
|
add edi, edx
|
|
|
|
sub edi, nfpk_back_right
|
|
add esi, 16
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf13: ; 2x2 4x4x0 (4 bytes)
|
|
mov edx, nf_width
|
|
|
|
mov cl, [esi]
|
|
mov ch, cl
|
|
mov eax, ecx
|
|
shl eax, 16
|
|
mov ax, cx
|
|
|
|
mov cl, [esi+1]
|
|
mov ch, cl
|
|
mov ebx, ecx
|
|
shl ebx, 16
|
|
mov bx, cx
|
|
|
|
mov [edi], eax
|
|
mov [edi+4], ebx
|
|
mov [edi+edx], eax
|
|
mov [edi+edx+4], ebx
|
|
lea edi, [edi+edx*2]
|
|
mov [edi], eax
|
|
mov [edi+4], ebx
|
|
mov [edi+edx], eax
|
|
mov [edi+edx+4], ebx
|
|
lea edi, [edi+edx*2]
|
|
|
|
mov cl, [esi+2]
|
|
mov ch, cl
|
|
mov eax, ecx
|
|
shl eax, 16
|
|
mov ax, cx
|
|
|
|
mov cl, [esi+3]
|
|
mov ch, cl
|
|
mov ebx, ecx
|
|
shl ebx, 16
|
|
mov bx, cx
|
|
|
|
mov [edi], eax
|
|
mov [edi+4], ebx
|
|
mov [edi+edx], eax
|
|
mov [edi+edx+4], ebx
|
|
lea edi, [edi+edx*2]
|
|
mov [edi], eax
|
|
mov [edi+4], ebx
|
|
add edi, edx
|
|
mov [edi], eax
|
|
mov [edi+4], ebx
|
|
|
|
sub edi, nfpk_back_right
|
|
add esi, 4
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf14: ; 8x8x0 (1 byte)
|
|
if 0 ;debug
|
|
jmp nf0
|
|
endif
|
|
mov bl, [esi] ; Copy color into 8 positions
|
|
inc esi
|
|
mov bh, bl
|
|
mov eax, ebx
|
|
shl eax, 16
|
|
mov ax, bx
|
|
mov ebx, eax
|
|
if 0 ;debug
|
|
mov eax, 080808080h
|
|
mov ebx, eax
|
|
endif
|
|
jmp nf_solid
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf15: ; mix 8x8x0 (2 bytes)
|
|
if 0 ;debug
|
|
inc esi
|
|
jmp nf0
|
|
endif
|
|
mov bx, [esi] ; Copy 2 colors into 8 positions
|
|
add esi, 2 ; in a checkerboard
|
|
mov ax, bx
|
|
shl eax, 16
|
|
mov ax, bx
|
|
mov ebx, eax
|
|
rol ebx, 8
|
|
if 0 ;debug
|
|
mov eax, 080808080h
|
|
mov ebx, eax
|
|
endif
|
|
nf_solid:
|
|
mov edx, nf_width
|
|
|
|
mov [edi], eax
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
mov [edi], ebx
|
|
mov [edi+4], ebx
|
|
add edi, edx
|
|
mov [edi], eax
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
mov [edi], ebx
|
|
mov [edi+4], ebx
|
|
add edi, edx
|
|
mov [edi], eax
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
mov [edi], ebx
|
|
mov [edi+4], ebx
|
|
add edi, edx
|
|
mov [edi], eax
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
mov [edi], ebx
|
|
mov [edi+4], ebx
|
|
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
nfPkDecomp ENDP
|
|
|
|
; Half vertical resolution version (skip odd lines)
|
|
;
|
|
nfPkDecompH PROC USES ESI EDI EBX, \
|
|
ops:PTRBYTE, comp:PTRBYTE, \
|
|
x:DWORD, y:DWORD, w:DWORD, h:DWORD
|
|
LOCAL tbuf: PTRBYTE
|
|
LOCAL new_row:DWORD
|
|
LOCAL DiffBufPtrs:DWORD
|
|
|
|
LOCAL nfpk_back_right: DWORD
|
|
LOCAL wcnt:DWORD
|
|
|
|
LOG_LABEL "StartPkDecomp"
|
|
|
|
.data
|
|
nfpk_OpTblH label dword
|
|
dword offset nf0 ; Prev Same (0)
|
|
dword offset nf1 ; No change (and copied to screen) (0)
|
|
dword offset nf2 ; Near shift from older part of current buf (1)
|
|
dword offset nf3 ; Near shift from newer part of current buf (1)
|
|
dword offset nf4 ; Near shift from previous buffer (1)
|
|
dword offset nf5 ; Far shift from previous buffer (2)
|
|
dword offset nf6 ; Far shift from current buffer (2)
|
|
; [Or if COMPOPS, run of no changes (0)]
|
|
dword offset nf7 ; 8x8x1 (10 bytes) or low 4x4x1 (4 bytes)
|
|
dword offset nf8 ; 2x2 4x4x1 (16 bytes) or 2x1 4x8x1 (12 bytes) or 1x2 8x4x1 (12 bytes)
|
|
dword offset nf9 ; 8x8x2 (20 bytes) or low 4x4x2 (8 bytes) or
|
|
; low 4x8x2 (12 bytes) or low 8x4x2 (12 bytes)
|
|
dword offset nf10 ; 2x2 4x4x2 (32 bytes) or 2x1 4x8x2 (24 bytes) or 1x2 4x8x2 (24 bytes)
|
|
dword offset nf11 ; 8x8x8 (64 bytes)
|
|
dword offset nf12 ; low 4x4x8 (16 bytes)
|
|
dword offset nf13 ; 2x2 4x4x0 (ie 2x2x8) (4 bytes)
|
|
dword offset nf14 ; 8x8x0 (1 byte)
|
|
dword offset nf15 ; mix 8x8x0 (2 bytes)
|
|
.code
|
|
|
|
ifdef SYMANTEC
|
|
mov ebx, ds ; Allow DS to access code
|
|
mov ecx, 0
|
|
mov ax, 3505h
|
|
int 21h
|
|
endif
|
|
|
|
NF_DECOMP_INIT 0
|
|
|
|
mov eax, nf_width
|
|
shl eax, 2
|
|
sub eax, nf_new_w
|
|
mov new_row, eax
|
|
|
|
shr nf_new_h, 1
|
|
|
|
mov eax, nf_width
|
|
lea eax, [eax*2+eax-SWIDTH]
|
|
mov nfpk_back_right, eax
|
|
|
|
mov esi, comp
|
|
mov edi, tbuf
|
|
nf_StartRow:
|
|
mov eax, w
|
|
shr eax, 1
|
|
mov wcnt,eax
|
|
ALIGN 4
|
|
nf_NextPair:
|
|
dec wcnt
|
|
js nf_NextRow
|
|
mov ebx, ops
|
|
mov al, [ebx]
|
|
inc ebx
|
|
mov ops, ebx
|
|
|
|
xor ebx, ebx
|
|
mov bl, al
|
|
shr bl, 4
|
|
and eax, 0Fh
|
|
push offset nf_NextPair
|
|
push nfpk_OpTblH[ebx*4]
|
|
jmp nfpk_OpTblH[eax*4]
|
|
|
|
nf_NextRow:
|
|
add edi, new_row
|
|
dec h
|
|
jnz nf_StartRow
|
|
LOG_LABEL "EndPkDecomp"
|
|
|
|
ifdef SYMANTEC
|
|
mov ebx, ds ; Disable DS from accessing code
|
|
mov ecx, offset DGROUP:_data_bottom[-1]
|
|
mov ax, 3505h
|
|
int 21h
|
|
endif
|
|
ret
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf0: ; No change from previous buffer
|
|
mov eax, DiffBufPtrs
|
|
jmp nf_shift
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf1: ; No change (and copied to screen)
|
|
add edi, SWIDTH
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf2: ; Near shift from older part of current buffer
|
|
xor eax, eax
|
|
mov al, [esi]
|
|
inc esi
|
|
mov ax, nfpk_ShiftP2[eax*2]
|
|
nf_xyc_shift:
|
|
xor ebx, ebx
|
|
mov bl, ah
|
|
shl eax, 24
|
|
sar eax, 24
|
|
add bl, 080h
|
|
adc bl, 080h
|
|
sar bl, 1
|
|
add eax, nfpk_ShiftY[ebx*4]
|
|
jmp nf_shift
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf3: ; Near shift from newer part of current buffer
|
|
xor eax, eax
|
|
mov al, [esi]
|
|
inc esi
|
|
mov ax, nfpk_ShiftP2[eax*2]
|
|
neg al
|
|
neg ah
|
|
jmp nf_xyc_shift
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf4: ; Near shift from previous buffer
|
|
xor eax, eax
|
|
mov al, [esi]
|
|
inc esi
|
|
mov ax, nfpk_ShiftP1[eax*2]
|
|
jmp nf_xyp_shift
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf5: ; Far shift from previous buffer
|
|
mov ax, [esi]
|
|
add esi, 2
|
|
nf_xyp_shift:
|
|
xor ebx, ebx
|
|
mov bl, ah
|
|
shl eax, 24
|
|
sar eax, 24
|
|
add bl, 080h
|
|
adc bl, 080h
|
|
sar bl, 1
|
|
add eax, nfpk_ShiftY[ebx*4]
|
|
add eax, DiffBufPtrs
|
|
jmp nf_shift
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
|
|
if COMPOPS
|
|
|
|
nf6: ; Run of no changes (must only appear in first nibble opcodes)
|
|
; Next nibble k specifies 2k+4 squares with no changes
|
|
add esp, 4 ; Next nibble is not an opcode
|
|
add ebx, 2 ; (minimum of 4 squares)
|
|
ALIGN 4
|
|
nf6a: add edi, SWIDTH*2 ; Advance over two squares
|
|
dec ebx
|
|
jz nf6z ; Last pair of squares
|
|
dec wcnt ; Same row?
|
|
jns nf6a ; Yes
|
|
add edi, new_row ; Advance to next row
|
|
dec h ; Decrement row count (should never become zero here)
|
|
mov eax, w ; Reset wcnt
|
|
shr eax ,1
|
|
dec eax
|
|
mov wcnt, eax
|
|
jmp nf6a
|
|
|
|
nf6z: retn
|
|
|
|
else
|
|
|
|
nf6: ; Far shift from current buffer
|
|
mov ax, [esi]
|
|
add esi, 2
|
|
jmp nf_xyc_shift
|
|
endif
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf_shift:
|
|
if 0 ;debug
|
|
mov eax, 0
|
|
mov ebx, eax
|
|
jmp nf_solid
|
|
endif
|
|
mov ebx, esi ; save esi
|
|
lea esi, [edi+eax]
|
|
mov edx, nf_width
|
|
|
|
REPEAT 3
|
|
mov eax, [esi]
|
|
mov [edi], eax
|
|
mov eax, [esi+4]
|
|
mov [edi+4], eax
|
|
add esi, edx
|
|
add edi, edx
|
|
ENDM
|
|
mov eax, [esi]
|
|
mov [edi], eax
|
|
mov eax, [esi+4]
|
|
mov [edi+4], eax
|
|
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
mov esi, ebx ; restore esi
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf7: ; 8x8x1 (10 bytes)
|
|
|
|
mov ax, [esi]
|
|
cmp al, ah
|
|
ja nf23
|
|
|
|
if 0 ;debug
|
|
add esi, 10
|
|
mov eax, 0fefefefeH
|
|
mov ebx, eax
|
|
jmp nf_solid
|
|
endif
|
|
xor eax, eax
|
|
lea ecx, nfpk_mov8
|
|
lea edx, byte ptr ds:nf7_11+2
|
|
|
|
mov al, [esi+2]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf7_11-nf7_11)], bl
|
|
mov [edx+(nf7_12-nf7_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf7_13-nf7_11)], bl
|
|
mov [edx+(nf7_14-nf7_11)], bh
|
|
|
|
mov al, [esi+4]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf7_31-nf7_11)], bl
|
|
mov [edx+(nf7_32-nf7_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf7_33-nf7_11)], bl
|
|
mov [edx+(nf7_34-nf7_11)], bh
|
|
|
|
lea edx, [edx+(nf7_51-nf7_11)]
|
|
|
|
mov al, [esi+6]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf7_51-nf7_51)], bl
|
|
mov [edx+(nf7_52-nf7_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf7_53-nf7_51)], bl
|
|
mov [edx+(nf7_54-nf7_51)], bh
|
|
|
|
mov al, [esi+8]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf7_71-nf7_51)], bl
|
|
mov [edx+(nf7_72-nf7_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf7_73-nf7_51)], bl
|
|
mov [edx+(nf7_74-nf7_51)], bh
|
|
|
|
push ebp
|
|
push esi
|
|
; load bx,dx,cx,bp with 00,01,10,11 color combinations
|
|
; (note that bits are read least significant first).
|
|
mov cx, [esi]
|
|
mov esi,nf_width
|
|
mov bl,cl
|
|
mov bh,cl
|
|
mov dl,ch
|
|
mov dh,cl
|
|
mov al,ch
|
|
mov ah,ch
|
|
mov ebp,eax
|
|
jmp nf7_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf7_0:
|
|
nf7_11: mov ax, bx
|
|
shl eax, 16
|
|
nf7_12: mov ax, bx
|
|
mov [edi], eax
|
|
nf7_13: mov ax, bx
|
|
shl eax, 16
|
|
nf7_14: mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf7_31: mov ax, bx
|
|
shl eax, 16
|
|
nf7_32: mov ax, bx
|
|
mov [edi], eax
|
|
nf7_33: mov ax, bx
|
|
shl eax, 16
|
|
nf7_34: mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf7_51: mov ax, bx
|
|
shl eax, 16
|
|
nf7_52: mov ax, bx
|
|
mov [edi], eax
|
|
nf7_53: mov ax, bx
|
|
shl eax, 16
|
|
nf7_54: mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf7_71: mov ax, bx
|
|
shl eax, 16
|
|
nf7_72: mov ax, bx
|
|
mov [edi], eax
|
|
nf7_73: mov ax, bx
|
|
shl eax, 16
|
|
nf7_74: mov ax, bx
|
|
mov [edi+4], eax
|
|
|
|
pop esi
|
|
pop ebp
|
|
add esi, 10
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
;nf7+16
|
|
nf23: ; low 4x4x1 (4 bytes)
|
|
|
|
xor eax, eax
|
|
lea ecx, nfpk_mov4l
|
|
lea edx, byte ptr ds:nf23_11+2
|
|
|
|
mov al, [esi+2]
|
|
and al, 0fH
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf23_11-nf23_11)], bl
|
|
mov [edx+(nf23_12-nf23_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf23_13-nf23_11)], bl
|
|
mov [edx+(nf23_14-nf23_11)], bh
|
|
|
|
mov al, [esi+2]
|
|
shr al, 4
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf23_31-nf23_11)], bl
|
|
mov [edx+(nf23_32-nf23_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf23_33-nf23_11)], bl
|
|
mov [edx+(nf23_34-nf23_11)], bh
|
|
|
|
|
|
mov al, [esi+3]
|
|
and al, 0fH
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf23_51-nf23_11)], bl
|
|
mov [edx+(nf23_52-nf23_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf23_53-nf23_11)], bl
|
|
mov [edx+(nf23_54-nf23_11)], bh
|
|
|
|
mov al, [esi+3]
|
|
shr al, 4
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf23_71-nf23_11)], bl
|
|
mov [edx+(nf23_72-nf23_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf23_73-nf23_11)], bl
|
|
mov [edx+(nf23_74-nf23_11)], bh
|
|
|
|
mov edx, nf_width
|
|
|
|
; load bx,cx with 00,11 color combinations
|
|
mov bx, [esi]
|
|
mov cl, bh
|
|
mov bh, bl
|
|
mov ch, cl
|
|
|
|
jmp nf23_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf23_0:
|
|
|
|
nf23_11:mov ax, bx
|
|
shl eax, 16
|
|
nf23_12:mov ax, bx
|
|
mov [edi], eax
|
|
|
|
nf23_13:mov ax, bx
|
|
shl eax, 16
|
|
nf23_14:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf23_31:mov ax, bx
|
|
shl eax, 16
|
|
nf23_32:mov ax, bx
|
|
mov [edi], eax
|
|
|
|
nf23_33:mov ax, bx
|
|
shl eax, 16
|
|
nf23_34:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf23_51:mov ax, bx
|
|
shl eax, 16
|
|
nf23_52:mov ax, bx
|
|
mov [edi], eax
|
|
|
|
nf23_53:mov ax, bx
|
|
shl eax, 16
|
|
nf23_54:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf23_71:mov ax, bx
|
|
shl eax, 16
|
|
nf23_72:mov ax, bx
|
|
mov [edi], eax
|
|
|
|
nf23_73:mov ax, bx
|
|
shl eax, 16
|
|
nf23_74:mov ax, bx
|
|
mov [edi+4], eax
|
|
|
|
sub edi, nfpk_back_right
|
|
add esi, 4
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf8: ; 2x2 4x4x1 (16 bytes)
|
|
|
|
mov ax, [esi]
|
|
cmp al, ah
|
|
ja nf24
|
|
|
|
xor eax, eax
|
|
lea ecx, nfpk_mov8
|
|
lea edx, byte ptr ds:nf8_11+2
|
|
|
|
mov al, [esi+2]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf8_11-nf8_11)], bl
|
|
mov [edx+(nf8_12-nf8_11)], bh
|
|
|
|
mov al, [esi+3]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf8_21-nf8_11)], bl
|
|
mov [edx+(nf8_22-nf8_11)], bh
|
|
|
|
|
|
mov al, [esi+6]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf8_31-nf8_11)], bl
|
|
mov [edx+(nf8_32-nf8_11)], bh
|
|
|
|
mov al, [esi+7]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf8_41-nf8_11)], bl
|
|
mov [edx+(nf8_42-nf8_11)], bh
|
|
|
|
add edx, nf8_51-nf8_11
|
|
|
|
mov al, [esi+10]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf8_51-nf8_51)], bl
|
|
mov [edx+(nf8_52-nf8_51)], bh
|
|
|
|
mov al, [esi+11]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf8_61-nf8_51)], bl
|
|
mov [edx+(nf8_62-nf8_51)], bh
|
|
|
|
|
|
mov al, [esi+14]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf8_71-nf8_51)], bl
|
|
mov [edx+(nf8_72-nf8_51)], bh
|
|
|
|
mov al, [esi+15]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf8_81-nf8_51)], bl
|
|
mov [edx+(nf8_82-nf8_51)], bh
|
|
|
|
|
|
push ebp
|
|
push esi
|
|
; load bx,dx,cx,bp with 00,01,10,11 color combinations
|
|
; (note that bits are read least significant first).
|
|
mov cx, [esi]
|
|
mov esi, nf_width
|
|
mov bl,cl
|
|
mov bh,cl
|
|
mov dl,ch
|
|
mov dh,cl
|
|
mov al,ch
|
|
mov ah,ch
|
|
mov ebp,eax
|
|
|
|
jmp nf8_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf8_0:
|
|
nf8_11: mov ax, bx
|
|
shl eax, 16
|
|
nf8_12: mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
|
|
nf8_21: mov ax, bx
|
|
shl eax, 16
|
|
nf8_22: mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
|
|
mov eax, [esp]
|
|
mov cx, [eax+4]
|
|
mov bl,cl
|
|
mov bh,cl
|
|
mov dl,ch
|
|
mov dh,cl
|
|
mov al,ch
|
|
mov ah,ch
|
|
mov ebp,eax
|
|
|
|
nf8_31: mov ax, bx
|
|
shl eax, 16
|
|
nf8_32: mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
|
|
nf8_41: mov ax, bx
|
|
shl eax, 16
|
|
nf8_42: mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
|
|
lea eax, [esi*4-4]
|
|
sub edi, eax
|
|
|
|
mov eax, [esp]
|
|
mov cx, [eax+8]
|
|
mov bl,cl
|
|
mov bh,cl
|
|
mov dl,ch
|
|
mov dh,cl
|
|
mov al,ch
|
|
mov ah,ch
|
|
mov ebp,eax
|
|
|
|
nf8_51: mov ax, bx
|
|
shl eax, 16
|
|
nf8_52: mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
|
|
nf8_61: mov ax, bx
|
|
shl eax, 16
|
|
nf8_62: mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
|
|
mov eax, [esp]
|
|
mov cx, [eax+12]
|
|
mov bl,cl
|
|
mov bh,cl
|
|
mov dl,ch
|
|
mov dh,cl
|
|
mov al,ch
|
|
mov ah,ch
|
|
mov ebp,eax
|
|
|
|
nf8_71: mov ax, bx
|
|
shl eax, 16
|
|
nf8_72: mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
|
|
nf8_81: mov ax, bx
|
|
shl eax, 16
|
|
nf8_82: mov ax, bx
|
|
mov [edi], eax
|
|
|
|
pop esi
|
|
pop ebp
|
|
add esi, 16
|
|
sub edi, 4
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
;nf8+16
|
|
nf24: ; 2x1 4x8x1 (12 bytes)
|
|
|
|
mov ax, [esi+6]
|
|
cmp al, ah
|
|
ja nf40
|
|
|
|
xor eax, eax
|
|
lea ecx, nfpk_mov8
|
|
lea edx, byte ptr ds:nf24_11+2
|
|
|
|
mov al, [esi+2]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf24_11-nf24_11)], bl
|
|
mov [edx+(nf24_12-nf24_11)], bh
|
|
|
|
mov al, [esi+3]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf24_21-nf24_11)], bl
|
|
mov [edx+(nf24_22-nf24_11)], bh
|
|
|
|
mov al, [esi+4]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf24_31-nf24_11)], bl
|
|
mov [edx+(nf24_32-nf24_11)], bh
|
|
|
|
mov al, [esi+5]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf24_41-nf24_11)], bl
|
|
mov [edx+(nf24_42-nf24_11)], bh
|
|
|
|
add edx, nf24_51-nf24_11
|
|
|
|
mov al, [esi+8]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf24_51-nf24_51)], bl
|
|
mov [edx+(nf24_52-nf24_51)], bh
|
|
|
|
mov al, [esi+9]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf24_61-nf24_51)], bl
|
|
mov [edx+(nf24_62-nf24_51)], bh
|
|
|
|
mov al, [esi+10]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf24_71-nf24_51)], bl
|
|
mov [edx+(nf24_72-nf24_51)], bh
|
|
|
|
mov al, [esi+11]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf24_81-nf24_51)], bl
|
|
mov [edx+(nf24_82-nf24_51)], bh
|
|
|
|
push ebp
|
|
push esi
|
|
; load bx,dx,cx,bp with 00,01,10,11 color combinations
|
|
; (note that bits are read least significant first).
|
|
mov cx, [esi]
|
|
mov esi, nf_width
|
|
mov bl,cl
|
|
mov bh,cl
|
|
mov dl,ch
|
|
mov dh,cl
|
|
mov al,ch
|
|
mov ah,ch
|
|
mov ebp,eax
|
|
|
|
jmp nf24_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf24_0:
|
|
nf24_11:mov ax, bx
|
|
shl eax, 16
|
|
nf24_12:mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
|
|
nf24_21:mov ax, bx
|
|
shl eax, 16
|
|
nf24_22:mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
|
|
nf24_31:mov ax, bx
|
|
shl eax, 16
|
|
nf24_32:mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
|
|
nf24_41:mov ax, bx
|
|
shl eax, 16
|
|
nf24_42:mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
|
|
lea eax, [esi*4-4]
|
|
sub edi, eax
|
|
|
|
mov eax, [esp]
|
|
mov cx, [eax+6]
|
|
mov bl,cl
|
|
mov bh,cl
|
|
mov dl,ch
|
|
mov dh,cl
|
|
mov al,ch
|
|
mov ah,ch
|
|
mov ebp,eax
|
|
|
|
nf24_51:mov ax, bx
|
|
shl eax, 16
|
|
nf24_52:mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
|
|
nf24_61:mov ax, bx
|
|
shl eax, 16
|
|
nf24_62:mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
|
|
nf24_71:mov ax, bx
|
|
shl eax, 16
|
|
nf24_72:mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
|
|
nf24_81:mov ax, bx
|
|
shl eax, 16
|
|
nf24_82:mov ax, bx
|
|
mov [edi], eax
|
|
|
|
pop esi
|
|
pop ebp
|
|
add esi, 12
|
|
sub edi, 4
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
;nf8+32
|
|
nf40: ; 1x2 8x4x1 (12 bytes)
|
|
xor eax, eax
|
|
lea ecx, nfpk_mov8
|
|
lea edx, byte ptr ds:nf40_11+2
|
|
|
|
mov al, [esi+2]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf40_11-nf40_11)], bl
|
|
mov [edx+(nf40_12-nf40_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf40_13-nf40_11)], bl
|
|
mov [edx+(nf40_14-nf40_11)], bh
|
|
|
|
mov al, [esi+4]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf40_31-nf40_11)], bl
|
|
mov [edx+(nf40_32-nf40_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf40_33-nf40_11)], bl
|
|
mov [edx+(nf40_34-nf40_11)], bh
|
|
|
|
add edx, nf40_51-nf40_11
|
|
|
|
mov al, [esi+8]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf40_51-nf40_51)], bl
|
|
mov [edx+(nf40_52-nf40_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf40_53-nf40_51)], bl
|
|
mov [edx+(nf40_54-nf40_51)], bh
|
|
|
|
mov al, [esi+10]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf40_71-nf40_51)], bl
|
|
mov [edx+(nf40_72-nf40_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf40_73-nf40_51)], bl
|
|
mov [edx+(nf40_74-nf40_51)], bh
|
|
|
|
push ebp
|
|
push esi
|
|
; load bx,dx,cx,bp with 00,01,10,11 color combinations
|
|
; (note that bits are read least significant first).
|
|
mov cx, [esi]
|
|
mov esi, nf_width
|
|
mov bl,cl
|
|
mov bh,cl
|
|
mov dl,ch
|
|
mov dh,cl
|
|
mov al,ch
|
|
mov ah,ch
|
|
mov ebp,eax
|
|
|
|
jmp nf40_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf40_0:
|
|
nf40_11:mov ax, bx
|
|
shl eax, 16
|
|
nf40_12:mov ax, bx
|
|
mov [edi], eax
|
|
nf40_13:mov ax, bx
|
|
shl eax, 16
|
|
nf40_14:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf40_31:mov ax, bx
|
|
shl eax, 16
|
|
nf40_32:mov ax, bx
|
|
mov [edi], eax
|
|
nf40_33:mov ax, bx
|
|
shl eax, 16
|
|
nf40_34:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
mov eax, [esp]
|
|
mov cx, [eax+6]
|
|
mov bl,cl
|
|
mov bh,cl
|
|
mov dl,ch
|
|
mov dh,cl
|
|
mov al,ch
|
|
mov ah,ch
|
|
mov ebp,eax
|
|
|
|
nf40_51:mov ax, bx
|
|
shl eax, 16
|
|
nf40_52:mov ax, bx
|
|
mov [edi], eax
|
|
nf40_53:mov ax, bx
|
|
shl eax, 16
|
|
nf40_54:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf40_71:mov ax, bx
|
|
shl eax, 16
|
|
nf40_72:mov ax, bx
|
|
mov [edi], eax
|
|
nf40_73:mov ax, bx
|
|
shl eax, 16
|
|
nf40_74:mov ax, bx
|
|
mov [edi+4], eax
|
|
|
|
pop esi
|
|
pop ebp
|
|
add esi, 12
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf9: ; 8x8x2 (20 bytes)
|
|
|
|
mov eax, [esi]
|
|
cmp al, ah
|
|
ja nf41
|
|
|
|
shr eax, 16
|
|
cmp al, ah
|
|
ja nf25
|
|
|
|
xor eax, eax
|
|
lea ecx, nfpk_mov4
|
|
lea edx, byte ptr ds:nf9_11+1
|
|
|
|
mov al, [esi+4]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_11-nf9_11)], bl
|
|
mov [edx+(nf9_12-nf9_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf9_13-nf9_11)], bl
|
|
mov [edx+(nf9_14-nf9_11)], bh
|
|
|
|
mov al, [esi+5]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_15-nf9_11)], bl
|
|
mov [edx+(nf9_16-nf9_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf9_17-nf9_11)], bl
|
|
mov [edx+(nf9_18-nf9_11)], bh
|
|
|
|
|
|
mov al, [esi+8]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_31-nf9_11)], bl
|
|
mov [edx+(nf9_32-nf9_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf9_33-nf9_11)], bl
|
|
mov [edx+(nf9_34-nf9_11)], bh
|
|
|
|
mov al, [esi+9]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_35-nf9_11)], bl
|
|
mov [edx+(nf9_36-nf9_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf9_37-nf9_11)], bl
|
|
mov [edx+(nf9_38-nf9_11)], bh
|
|
|
|
lea edx, [edx+(nf9_51-nf9_11)]
|
|
|
|
mov al, [esi+12]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_51-nf9_51)], bl
|
|
mov [edx+(nf9_52-nf9_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf9_53-nf9_51)], bl
|
|
mov [edx+(nf9_54-nf9_51)], bh
|
|
|
|
mov al, [esi+13]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_55-nf9_51)], bl
|
|
mov [edx+(nf9_56-nf9_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf9_57-nf9_51)], bl
|
|
mov [edx+(nf9_58-nf9_51)], bh
|
|
|
|
|
|
mov al, [esi+16]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_71-nf9_51)], bl
|
|
mov [edx+(nf9_72-nf9_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf9_73-nf9_51)], bl
|
|
mov [edx+(nf9_74-nf9_51)], bh
|
|
|
|
mov al, [esi+17]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_75-nf9_51)], bl
|
|
mov [edx+(nf9_76-nf9_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf9_77-nf9_51)], bl
|
|
mov [edx+(nf9_78-nf9_51)], bh
|
|
|
|
; Load bl,bh,cl,ch with four colors
|
|
mov bx, [esi]
|
|
mov cx, [esi+2]
|
|
|
|
mov edx, nf_width
|
|
jmp nf9_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf9_0:
|
|
nf9_11: mov al, bl
|
|
nf9_12: mov ah, bl
|
|
shl eax, 16
|
|
nf9_13: mov al, bl
|
|
nf9_14: mov ah, bl
|
|
mov [edi], eax
|
|
|
|
nf9_15: mov al, bl
|
|
nf9_16: mov ah, bl
|
|
shl eax, 16
|
|
nf9_17: mov al, bl
|
|
nf9_18: mov ah, bl
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf9_31: mov al, bl
|
|
nf9_32: mov ah, bl
|
|
shl eax, 16
|
|
nf9_33: mov al, bl
|
|
nf9_34: mov ah, bl
|
|
mov [edi], eax
|
|
|
|
nf9_35: mov al, bl
|
|
nf9_36: mov ah, bl
|
|
shl eax, 16
|
|
nf9_37: mov al, bl
|
|
nf9_38: mov ah, bl
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf9_51: mov al, bl
|
|
nf9_52: mov ah, bl
|
|
shl eax, 16
|
|
nf9_53: mov al, bl
|
|
nf9_54: mov ah, bl
|
|
mov [edi], eax
|
|
|
|
nf9_55: mov al, bl
|
|
nf9_56: mov ah, bl
|
|
shl eax, 16
|
|
nf9_57: mov al, bl
|
|
nf9_58: mov ah, bl
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf9_71: mov al, bl
|
|
nf9_72: mov ah, bl
|
|
shl eax, 16
|
|
nf9_73: mov al, bl
|
|
nf9_74: mov ah, bl
|
|
mov [edi], eax
|
|
|
|
nf9_75: mov al, bl
|
|
nf9_76: mov ah, bl
|
|
shl eax, 16
|
|
nf9_77: mov al, bl
|
|
nf9_78: mov ah, bl
|
|
mov [edi+4], eax
|
|
|
|
add esi, 20
|
|
sub edi, nfpk_back_right
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
;nf9+16
|
|
nf25: ; low 4x4x2 (8 bytes)
|
|
|
|
if 0 ;debug
|
|
mov eax, 0
|
|
mov ebx, 0
|
|
add esi, 8
|
|
jmp nf_solid
|
|
endif
|
|
|
|
xor eax, eax
|
|
lea ecx, nfpk_mov4
|
|
lea edx, byte ptr ds:nf25_11+1
|
|
|
|
mov al, [esi+4]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf25_14-nf25_11)], bl
|
|
mov [edx+(nf25_13-nf25_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf25_12-nf25_11)], bl
|
|
mov [edx+(nf25_11-nf25_11)], bh
|
|
|
|
mov al, [esi+5]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf25_24-nf25_11)], bl
|
|
mov [edx+(nf25_23-nf25_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf25_22-nf25_11)], bl
|
|
mov [edx+(nf25_21-nf25_11)], bh
|
|
|
|
|
|
mov al, [esi+6]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf25_34-nf25_11)], bl
|
|
mov [edx+(nf25_33-nf25_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf25_32-nf25_11)], bl
|
|
mov [edx+(nf25_31-nf25_11)], bh
|
|
|
|
mov al, [esi+7]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf25_44-nf25_11)], bl
|
|
mov [edx+(nf25_43-nf25_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf25_42-nf25_11)], bl
|
|
mov [edx+(nf25_41-nf25_11)], bh
|
|
|
|
; Load bl,bh,cl,ch with four colors
|
|
mov bx, [esi]
|
|
mov cx, [esi+2]
|
|
|
|
mov edx, nf_width
|
|
jmp nf25_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf25_0:
|
|
nf25_11:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf25_12:mov al, bl
|
|
mov ah, al
|
|
mov [edi], eax
|
|
nf25_13:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf25_14:mov al, bl
|
|
mov ah, al
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf25_21:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf25_22:mov al, bl
|
|
mov ah, al
|
|
mov [edi], eax
|
|
nf25_23:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf25_24:mov al, bl
|
|
mov ah, al
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf25_31:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf25_32:mov al, bl
|
|
mov ah, al
|
|
mov [edi], eax
|
|
nf25_33:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf25_34:mov al, bl
|
|
mov ah, al
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf25_41:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf25_42:mov al, bl
|
|
mov ah, al
|
|
mov [edi], eax
|
|
nf25_43:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf25_44:mov al, bl
|
|
mov ah, al
|
|
mov [edi+4], eax
|
|
|
|
add esi, 8
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
;nf9+32
|
|
nf41: ; low 4x8x2 (12 bytes)
|
|
shr eax, 16
|
|
cmp al, ah
|
|
ja nf57
|
|
|
|
xor eax, eax
|
|
lea ecx, nfpk_mov4
|
|
lea edx, byte ptr ds:nf41_11+1
|
|
|
|
mov al, [esi+4]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf41_14-nf41_11)], bl
|
|
mov [edx+(nf41_13-nf41_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf41_12-nf41_11)], bl
|
|
mov [edx+(nf41_11-nf41_11)], bh
|
|
|
|
mov al, [esi+6]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf41_34-nf41_11)], bl
|
|
mov [edx+(nf41_33-nf41_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf41_32-nf41_11)], bl
|
|
mov [edx+(nf41_31-nf41_11)], bh
|
|
|
|
lea edx, [edx+(nf41_51-nf41_11)]
|
|
|
|
mov al, [esi+8]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf41_54-nf41_51)], bl
|
|
mov [edx+(nf41_53-nf41_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf41_52-nf41_51)], bl
|
|
mov [edx+(nf41_51-nf41_51)], bh
|
|
|
|
mov al, [esi+10]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf41_74-nf41_51)], bl
|
|
mov [edx+(nf41_73-nf41_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf41_72-nf41_51)], bl
|
|
mov [edx+(nf41_71-nf41_51)], bh
|
|
|
|
; Load bl,bh,cl,ch with four colors
|
|
mov bx, [esi]
|
|
mov cx, [esi+2]
|
|
|
|
mov edx, nf_width
|
|
jmp nf41_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf41_0:
|
|
nf41_11:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf41_12:mov al, bl
|
|
mov ah, al
|
|
mov [edi], eax
|
|
nf41_13:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf41_14:mov al, bl
|
|
mov ah, al
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf41_31:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf41_32:mov al, bl
|
|
mov ah, al
|
|
mov [edi], eax
|
|
nf41_33:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf41_34:mov al, bl
|
|
mov ah, al
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf41_51:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf41_52:mov al, bl
|
|
mov ah, al
|
|
mov [edi], eax
|
|
nf41_53:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf41_54:mov al, bl
|
|
mov ah, al
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf41_71:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf41_72:mov al, bl
|
|
mov ah, al
|
|
mov [edi], eax
|
|
nf41_73:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf41_74:mov al, bl
|
|
mov ah, al
|
|
mov [edi+4], eax
|
|
|
|
add esi, 12
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
;nf9+48
|
|
nf57: ; low 8x4x2 (12 bytes)
|
|
xor eax, eax
|
|
lea ecx, nfpk_mov4
|
|
lea edx, byte ptr ds:nf57_11+1
|
|
|
|
mov al, [esi+4]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf57_11-nf57_11)], bl
|
|
mov [edx+(nf57_12-nf57_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf57_13-nf57_11)], bl
|
|
mov [edx+(nf57_14-nf57_11)], bh
|
|
|
|
mov al, [esi+5]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf57_15-nf57_11)], bl
|
|
mov [edx+(nf57_16-nf57_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf57_17-nf57_11)], bl
|
|
mov [edx+(nf57_18-nf57_11)], bh
|
|
|
|
|
|
mov al, [esi+6]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf57_21-nf57_11)], bl
|
|
mov [edx+(nf57_22-nf57_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf57_23-nf57_11)], bl
|
|
mov [edx+(nf57_24-nf57_11)], bh
|
|
|
|
mov al, [esi+7]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf57_25-nf57_11)], bl
|
|
mov [edx+(nf57_26-nf57_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf57_27-nf57_11)], bl
|
|
mov [edx+(nf57_28-nf57_11)], bh
|
|
|
|
|
|
mov al, [esi+8]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf57_31-nf57_11)], bl
|
|
mov [edx+(nf57_32-nf57_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf57_33-nf57_11)], bl
|
|
mov [edx+(nf57_34-nf57_11)], bh
|
|
|
|
mov al, [esi+9]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf57_35-nf57_11)], bl
|
|
mov [edx+(nf57_36-nf57_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf57_37-nf57_11)], bl
|
|
mov [edx+(nf57_38-nf57_11)], bh
|
|
|
|
|
|
mov al, [esi+10]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf57_41-nf57_11)], bl
|
|
mov [edx+(nf57_42-nf57_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf57_43-nf57_11)], bl
|
|
mov [edx+(nf57_44-nf57_11)], bh
|
|
|
|
mov al, [esi+11]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf57_45-nf57_11)], bl
|
|
mov [edx+(nf57_46-nf57_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf57_47-nf57_11)], bl
|
|
mov [edx+(nf57_48-nf57_11)], bh
|
|
|
|
; Load bl,bh,cl,ch with four colors
|
|
mov bx, [esi]
|
|
mov cx, [esi+2]
|
|
|
|
mov edx, nf_width
|
|
jmp nf57_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf57_0:
|
|
nf57_11:mov al, bl
|
|
nf57_12:mov ah, bl
|
|
shl eax, 16
|
|
nf57_13:mov al, bl
|
|
nf57_14:mov ah, bl
|
|
mov [edi], eax
|
|
|
|
nf57_15:mov al, bl
|
|
nf57_16:mov ah, bl
|
|
shl eax, 16
|
|
nf57_17:mov al, bl
|
|
nf57_18:mov ah, bl
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf57_21:mov al, bl
|
|
nf57_22:mov ah, bl
|
|
shl eax, 16
|
|
nf57_23:mov al, bl
|
|
nf57_24:mov ah, bl
|
|
mov [edi], eax
|
|
|
|
nf57_25:mov al, bl
|
|
nf57_26:mov ah, bl
|
|
shl eax, 16
|
|
nf57_27:mov al, bl
|
|
nf57_28:mov ah, bl
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf57_31:mov al, bl
|
|
nf57_32:mov ah, bl
|
|
shl eax, 16
|
|
nf57_33:mov al, bl
|
|
nf57_34:mov ah, bl
|
|
mov [edi], eax
|
|
|
|
nf57_35:mov al, bl
|
|
nf57_36:mov ah, bl
|
|
shl eax, 16
|
|
nf57_37:mov al, bl
|
|
nf57_38:mov ah, bl
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf57_41:mov al, bl
|
|
nf57_42:mov ah, bl
|
|
shl eax, 16
|
|
nf57_43:mov al, bl
|
|
nf57_44:mov ah, bl
|
|
mov [edi], eax
|
|
|
|
nf57_45:mov al, bl
|
|
nf57_46:mov ah, bl
|
|
shl eax, 16
|
|
nf57_47:mov al, bl
|
|
nf57_48:mov ah, bl
|
|
mov [edi+4], eax
|
|
|
|
add esi, 12
|
|
sub edi, nfpk_back_right
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf10: ; 2x2 4x4x2 (32 bytes)
|
|
|
|
mov ax, [esi]
|
|
cmp al, ah
|
|
ja nf26
|
|
|
|
xor eax, eax
|
|
lea ecx, nfpk_mov4
|
|
lea edx, byte ptr ds:nf10_11+1
|
|
|
|
mov al, [esi+4]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_11-nf10_11)], bl
|
|
mov [edx+(nf10_12-nf10_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf10_13-nf10_11)], bl
|
|
mov [edx+(nf10_14-nf10_11)], bh
|
|
|
|
mov al, [esi+6]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_21-nf10_11)], bl
|
|
mov [edx+(nf10_22-nf10_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf10_23-nf10_11)], bl
|
|
mov [edx+(nf10_24-nf10_11)], bh
|
|
|
|
mov al, [esi+12]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_31-nf10_11)], bl
|
|
mov [edx+(nf10_32-nf10_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf10_33-nf10_11)], bl
|
|
mov [edx+(nf10_34-nf10_11)], bh
|
|
|
|
mov al, [esi+14]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_41-nf10_11)], bl
|
|
mov [edx+(nf10_42-nf10_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf10_43-nf10_11)], bl
|
|
mov [edx+(nf10_44-nf10_11)], bh
|
|
|
|
lea edx, [edx+(nf10_51-nf10_11)]
|
|
|
|
mov al, [esi+20]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_51-nf10_51)], bl
|
|
mov [edx+(nf10_52-nf10_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf10_53-nf10_51)], bl
|
|
mov [edx+(nf10_54-nf10_51)], bh
|
|
|
|
mov al, [esi+22]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_61-nf10_51)], bl
|
|
mov [edx+(nf10_62-nf10_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf10_63-nf10_51)], bl
|
|
mov [edx+(nf10_64-nf10_51)], bh
|
|
|
|
mov al, [esi+28]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_71-nf10_51)], bl
|
|
mov [edx+(nf10_72-nf10_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf10_73-nf10_51)], bl
|
|
mov [edx+(nf10_74-nf10_51)], bh
|
|
|
|
mov al, [esi+30]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_81-nf10_51)], bl
|
|
mov [edx+(nf10_82-nf10_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf10_83-nf10_51)], bl
|
|
mov [edx+(nf10_84-nf10_51)], bh
|
|
|
|
; Load bl,bh,cl,ch with four colors
|
|
mov bx, [esi]
|
|
mov cx, [esi+2]
|
|
|
|
mov edx, nf_width
|
|
jmp nf10_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf10_0:
|
|
nf10_11:mov al, bl
|
|
nf10_12:mov ah, bl
|
|
shl eax, 16
|
|
nf10_13:mov al, bl
|
|
nf10_14:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf10_21:mov al, bl
|
|
nf10_22:mov ah, bl
|
|
shl eax, 16
|
|
nf10_23:mov al, bl
|
|
nf10_24:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
; Load bl,bh,cl,ch with four colors
|
|
mov bx, [esi+8]
|
|
mov cx, [esi+10]
|
|
|
|
nf10_31:mov al, bl
|
|
nf10_32:mov ah, bl
|
|
shl eax, 16
|
|
nf10_33:mov al, bl
|
|
nf10_34:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf10_41:mov al, bl
|
|
nf10_42:mov ah, bl
|
|
shl eax, 16
|
|
nf10_43:mov al, bl
|
|
nf10_44:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
lea eax, [edx*4-4]
|
|
sub edi, eax
|
|
|
|
; Load bl,bh,cl,ch with four colors
|
|
mov bx, [esi+16]
|
|
mov cx, [esi+18]
|
|
|
|
nf10_51:mov al, bl
|
|
nf10_52:mov ah, bl
|
|
shl eax, 16
|
|
nf10_53:mov al, bl
|
|
nf10_54:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf10_61:mov al, bl
|
|
nf10_62:mov ah, bl
|
|
shl eax, 16
|
|
nf10_63:mov al, bl
|
|
nf10_64:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
; Load bl,bh,cl,ch with four colors
|
|
mov bx, [esi+24]
|
|
mov cx, [esi+26]
|
|
|
|
nf10_71:mov al, bl
|
|
nf10_72:mov ah, bl
|
|
shl eax, 16
|
|
nf10_73:mov al, bl
|
|
nf10_74:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf10_81:mov al, bl
|
|
nf10_82:mov ah, bl
|
|
shl eax, 16
|
|
nf10_83:mov al, bl
|
|
nf10_84:mov ah, bl
|
|
mov [edi], eax
|
|
|
|
add esi, 32
|
|
sub edi, 4
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
;nf10+16
|
|
nf26: ; 2x1 4x8x2 (24 bytes)
|
|
|
|
mov ax, [esi+12]
|
|
cmp al, ah
|
|
ja nf42
|
|
|
|
if 0 ;debug
|
|
mov eax, 0
|
|
mov ebx, 0
|
|
add esi, 24
|
|
jmp nf_solid
|
|
endif
|
|
|
|
xor eax, eax
|
|
lea ecx, nfpk_mov4
|
|
lea edx, byte ptr ds:nf26_11+1
|
|
|
|
mov al, [esi+4]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_11-nf26_11)], bl
|
|
mov [edx+(nf26_12-nf26_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf26_13-nf26_11)], bl
|
|
mov [edx+(nf26_14-nf26_11)], bh
|
|
|
|
mov al, [esi+6]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_21-nf26_11)], bl
|
|
mov [edx+(nf26_22-nf26_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf26_23-nf26_11)], bl
|
|
mov [edx+(nf26_24-nf26_11)], bh
|
|
|
|
mov al, [esi+8]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_31-nf26_11)], bl
|
|
mov [edx+(nf26_32-nf26_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf26_33-nf26_11)], bl
|
|
mov [edx+(nf26_34-nf26_11)], bh
|
|
|
|
mov al, [esi+10]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_41-nf26_11)], bl
|
|
mov [edx+(nf26_42-nf26_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf26_43-nf26_11)], bl
|
|
mov [edx+(nf26_44-nf26_11)], bh
|
|
|
|
lea edx, [edx+(nf26_51-nf26_11)]
|
|
|
|
mov al, [esi+16]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_51-nf26_51)], bl
|
|
mov [edx+(nf26_52-nf26_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf26_53-nf26_51)], bl
|
|
mov [edx+(nf26_54-nf26_51)], bh
|
|
|
|
mov al, [esi+18]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_61-nf26_51)], bl
|
|
mov [edx+(nf26_62-nf26_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf26_63-nf26_51)], bl
|
|
mov [edx+(nf26_64-nf26_51)], bh
|
|
|
|
mov al, [esi+20]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_71-nf26_51)], bl
|
|
mov [edx+(nf26_72-nf26_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf26_73-nf26_51)], bl
|
|
mov [edx+(nf26_74-nf26_51)], bh
|
|
|
|
mov al, [esi+22]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_81-nf26_51)], bl
|
|
mov [edx+(nf26_82-nf26_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf26_83-nf26_51)], bl
|
|
mov [edx+(nf26_84-nf26_51)], bh
|
|
|
|
; Load bl,bh,cl,ch with four colors
|
|
mov bx, [esi]
|
|
mov cx, [esi+2]
|
|
|
|
mov edx, nf_width
|
|
jmp nf26_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf26_0:
|
|
nf26_11:mov al, bl
|
|
nf26_12:mov ah, bl
|
|
shl eax, 16
|
|
nf26_13:mov al, bl
|
|
nf26_14:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf26_21:mov al, bl
|
|
nf26_22:mov ah, bl
|
|
shl eax, 16
|
|
nf26_23:mov al, bl
|
|
nf26_24:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf26_31:mov al, bl
|
|
nf26_32:mov ah, bl
|
|
shl eax, 16
|
|
nf26_33:mov al, bl
|
|
nf26_34:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf26_41:mov al, bl
|
|
nf26_42:mov ah, bl
|
|
shl eax, 16
|
|
nf26_43:mov al, bl
|
|
nf26_44:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
lea eax, [edx*4-4]
|
|
sub edi, eax
|
|
|
|
; Load bl,bh,cl,ch with four colors
|
|
mov bx, [esi+12]
|
|
mov cx, [esi+14]
|
|
|
|
nf26_51:mov al, bl
|
|
nf26_52:mov ah, bl
|
|
shl eax, 16
|
|
nf26_53:mov al, bl
|
|
nf26_54:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf26_61:mov al, bl
|
|
nf26_62:mov ah, bl
|
|
shl eax, 16
|
|
nf26_63:mov al, bl
|
|
nf26_64:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf26_71:mov al, bl
|
|
nf26_72:mov ah, bl
|
|
shl eax, 16
|
|
nf26_73:mov al, bl
|
|
nf26_74:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf26_81:mov al, bl
|
|
nf26_82:mov ah, bl
|
|
shl eax, 16
|
|
nf26_83:mov al, bl
|
|
nf26_84:mov ah, bl
|
|
mov [edi], eax
|
|
|
|
add esi, 24
|
|
sub edi, 4
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
;nf10+32
|
|
nf42: ; 1x2 8x4x2 (24 bytes)
|
|
|
|
if 0 ;debug
|
|
mov eax, 0
|
|
mov ebx, 0
|
|
add esi, 24
|
|
jmp nf_solid
|
|
endif
|
|
xor eax, eax
|
|
lea ecx, nfpk_mov4
|
|
lea edx, byte ptr ds:nf42_11+1
|
|
|
|
mov al, [esi+4]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_11-nf42_11)], bl
|
|
mov [edx+(nf42_12-nf42_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf42_13-nf42_11)], bl
|
|
mov [edx+(nf42_14-nf42_11)], bh
|
|
|
|
mov al, [esi+5]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_15-nf42_11)], bl
|
|
mov [edx+(nf42_16-nf42_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf42_17-nf42_11)], bl
|
|
mov [edx+(nf42_18-nf42_11)], bh
|
|
|
|
|
|
mov al, [esi+8]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_31-nf42_11)], bl
|
|
mov [edx+(nf42_32-nf42_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf42_33-nf42_11)], bl
|
|
mov [edx+(nf42_34-nf42_11)], bh
|
|
|
|
mov al, [esi+9]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_35-nf42_11)], bl
|
|
mov [edx+(nf42_36-nf42_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf42_37-nf42_11)], bl
|
|
mov [edx+(nf42_38-nf42_11)], bh
|
|
|
|
lea edx, [edx+(nf42_51-nf42_11)]
|
|
|
|
mov al, [esi+16]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_51-nf42_51)], bl
|
|
mov [edx+(nf42_52-nf42_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf42_53-nf42_51)], bl
|
|
mov [edx+(nf42_54-nf42_51)], bh
|
|
|
|
mov al, [esi+17]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_55-nf42_51)], bl
|
|
mov [edx+(nf42_56-nf42_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf42_57-nf42_51)], bl
|
|
mov [edx+(nf42_58-nf42_51)], bh
|
|
|
|
|
|
mov al, [esi+20]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_71-nf42_51)], bl
|
|
mov [edx+(nf42_72-nf42_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf42_73-nf42_51)], bl
|
|
mov [edx+(nf42_74-nf42_51)], bh
|
|
|
|
mov al, [esi+21]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_75-nf42_51)], bl
|
|
mov [edx+(nf42_76-nf42_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf42_77-nf42_51)], bl
|
|
mov [edx+(nf42_78-nf42_51)], bh
|
|
|
|
|
|
; Load bl,bh,cl,ch with four colors
|
|
mov bx, [esi]
|
|
mov cx, [esi+2]
|
|
|
|
mov edx, nf_width
|
|
jmp nf42_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf42_0:
|
|
nf42_11:mov al, bl
|
|
nf42_12:mov ah, bl
|
|
shl eax, 16
|
|
nf42_13:mov al, bl
|
|
nf42_14:mov ah, bl
|
|
mov [edi], eax
|
|
|
|
nf42_15:mov al, bl
|
|
nf42_16:mov ah, bl
|
|
shl eax, 16
|
|
nf42_17:mov al, bl
|
|
nf42_18:mov ah, bl
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf42_31:mov al, bl
|
|
nf42_32:mov ah, bl
|
|
shl eax, 16
|
|
nf42_33:mov al, bl
|
|
nf42_34:mov ah, bl
|
|
mov [edi], eax
|
|
|
|
nf42_35:mov al, bl
|
|
nf42_36:mov ah, bl
|
|
shl eax, 16
|
|
nf42_37:mov al, bl
|
|
nf42_38:mov ah, bl
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
; Load bl,bh,cl,ch with four colors
|
|
mov bx, [esi+12]
|
|
mov cx, [esi+14]
|
|
|
|
nf42_51:mov al, bl
|
|
nf42_52:mov ah, bl
|
|
shl eax, 16
|
|
nf42_53:mov al, bl
|
|
nf42_54:mov ah, bl
|
|
mov [edi], eax
|
|
|
|
nf42_55:mov al, bl
|
|
nf42_56:mov ah, bl
|
|
shl eax, 16
|
|
nf42_57:mov al, bl
|
|
nf42_58:mov ah, bl
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf42_71:mov al, bl
|
|
nf42_72:mov ah, bl
|
|
shl eax, 16
|
|
nf42_73:mov al, bl
|
|
nf42_74:mov ah, bl
|
|
mov [edi], eax
|
|
|
|
nf42_75:mov al, bl
|
|
nf42_76:mov ah, bl
|
|
shl eax, 16
|
|
nf42_77:mov al, bl
|
|
nf42_78:mov ah, bl
|
|
mov [edi+4], eax
|
|
|
|
add esi, 24
|
|
sub edi, nfpk_back_right
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf11: ; 8x8x8 (64 bytes)
|
|
if 0 ;debug
|
|
add esi, 64
|
|
mov eax, 0fefefefeH
|
|
; mov ebx, eax
|
|
mov ebx, 0
|
|
jmp nf_solid
|
|
endif
|
|
mov edx, nf_width
|
|
|
|
mov eax, [esi] ;0
|
|
mov [edi], eax
|
|
mov eax, [esi+4]
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
mov eax, [esi+16] ;2
|
|
mov [edi], eax
|
|
mov eax, [esi+20]
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
mov eax, [esi+32] ;4
|
|
mov [edi], eax
|
|
mov eax, [esi+36]
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
mov eax, [esi+48] ;6
|
|
mov [edi], eax
|
|
mov eax, [esi+52]
|
|
mov [edi+4], eax
|
|
|
|
add esi, 64
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf12: ; low 4x4x8 (16 bytes)
|
|
mov edx, nf_width
|
|
|
|
mov eax, [esi]
|
|
mov bl, ah
|
|
mov bh, ah
|
|
shl ebx, 16
|
|
mov bl, al
|
|
mov bh, al
|
|
mov [edi], ebx
|
|
shr eax, 16
|
|
mov bl, ah
|
|
mov bh, ah
|
|
shl ebx, 16
|
|
mov bl, al
|
|
mov bh, al
|
|
mov [edi+4], ebx
|
|
add edi, edx
|
|
|
|
mov eax, [esi+4]
|
|
mov bl, ah
|
|
mov bh, ah
|
|
shl ebx, 16
|
|
mov bl, al
|
|
mov bh, al
|
|
mov [edi], ebx
|
|
shr eax, 16
|
|
mov bl, ah
|
|
mov bh, ah
|
|
shl ebx, 16
|
|
mov bl, al
|
|
mov bh, al
|
|
mov [edi+4], ebx
|
|
add edi, edx
|
|
|
|
mov eax, [esi+8]
|
|
mov bl, ah
|
|
mov bh, ah
|
|
shl ebx, 16
|
|
mov bl, al
|
|
mov bh, al
|
|
mov [edi], ebx
|
|
shr eax, 16
|
|
mov bl, ah
|
|
mov bh, ah
|
|
shl ebx, 16
|
|
mov bl, al
|
|
mov bh, al
|
|
mov [edi+4], ebx
|
|
add edi, edx
|
|
|
|
mov eax, [esi+12]
|
|
mov bl, ah
|
|
mov bh, ah
|
|
shl ebx, 16
|
|
mov bl, al
|
|
mov bh, al
|
|
mov [edi], ebx
|
|
shr eax, 16
|
|
mov bl, ah
|
|
mov bh, ah
|
|
shl ebx, 16
|
|
mov bl, al
|
|
mov bh, al
|
|
mov [edi+4], ebx
|
|
|
|
sub edi, nfpk_back_right
|
|
add esi, 16
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf13: ; 2x2 4x4x0 (4 bytes)
|
|
mov edx, nf_width
|
|
|
|
mov cl, [esi]
|
|
mov ch, cl
|
|
mov eax, ecx
|
|
shl eax, 16
|
|
mov ax, cx
|
|
|
|
mov cl, [esi+1]
|
|
mov ch, cl
|
|
mov ebx, ecx
|
|
shl ebx, 16
|
|
mov bx, cx
|
|
|
|
mov [edi], eax
|
|
mov [edi+4], ebx
|
|
mov [edi+edx], eax
|
|
mov [edi+edx+4], ebx
|
|
lea edi, [edi+edx*2]
|
|
|
|
mov cl, [esi+2]
|
|
mov ch, cl
|
|
mov eax, ecx
|
|
shl eax, 16
|
|
mov ax, cx
|
|
|
|
mov cl, [esi+3]
|
|
mov ch, cl
|
|
mov ebx, ecx
|
|
shl ebx, 16
|
|
mov bx, cx
|
|
|
|
mov [edi], eax
|
|
mov [edi+4], ebx
|
|
add edi, edx
|
|
mov [edi], eax
|
|
mov [edi+4], ebx
|
|
|
|
sub edi, nfpk_back_right
|
|
add esi, 4
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf14: ; 8x8x0 (1 byte)
|
|
mov bl, [esi] ; Copy color into 8 positions
|
|
inc esi
|
|
mov bh, bl
|
|
mov eax, ebx
|
|
shl eax, 16
|
|
mov ax, bx
|
|
mov ebx, eax
|
|
if 0 ;debug
|
|
mov eax, 080808080h
|
|
mov ebx, eax
|
|
endif
|
|
jmp nf_solid
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf15: ; mix 8x8x0 (2 bytes)
|
|
mov bx, [esi] ; Copy 2 colors into 8 positions
|
|
add esi, 2 ; in a checkerboard
|
|
mov ax, bx
|
|
shl eax, 16
|
|
mov ax, bx
|
|
mov ebx, eax
|
|
rol ebx, 8
|
|
if 0 ;debug
|
|
mov eax, 080808080h
|
|
mov ebx, eax
|
|
endif
|
|
nf_solid:
|
|
mov edx, nf_width
|
|
|
|
mov [edi], eax
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
mov [edi], ebx
|
|
mov [edi+4], ebx
|
|
add edi, edx
|
|
mov [edi], eax
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
mov [edi], ebx
|
|
mov [edi+4], ebx
|
|
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
nfPkDecompH ENDP
|
|
|
|
if DECOMPD
|
|
|
|
; Half vertical resolution version (dither between lines)
|
|
;
|
|
nfPkDecompD PROC USES ESI EDI EBX, \
|
|
ops:PTRBYTE, comp:PTRBYTE, \
|
|
x:DWORD, y:DWORD, w:DWORD, h:DWORD
|
|
LOCAL tbuf: PTRBYTE
|
|
LOCAL new_row:DWORD
|
|
LOCAL DiffBufPtrs:DWORD
|
|
|
|
LOCAL nfpk_back_right: DWORD
|
|
LOCAL wcnt:DWORD
|
|
|
|
LOG_LABEL "StartPkDecomp"
|
|
|
|
.data
|
|
nfpk_OpTblD label dword
|
|
dword offset nf0 ; Prev Same (0)
|
|
dword offset nf1 ; No change (and copied to screen) (0)
|
|
dword offset nf2 ; Near shift from older part of current buf (1)
|
|
dword offset nf3 ; Near shift from newer part of current buf (1)
|
|
dword offset nf4 ; Near shift from previous buffer (1)
|
|
dword offset nf5 ; Far shift from previous buffer (2)
|
|
dword offset nf6 ; Far shift from current buffer (2)
|
|
; [Or if COMPOPS, run of no changes (0)]
|
|
dword offset nf7 ; 8x8x1 (10 bytes) or low 4x4x1 (4 bytes)
|
|
dword offset nf8 ; 2x2 4x4x1 (16 bytes) or 2x1 4x8x1 (12 bytes) or 1x2 8x4x1 (12 bytes)
|
|
dword offset nf9 ; 8x8x2 (20 bytes) or low 4x4x2 (8 bytes) or
|
|
; low 4x8x2 (12 bytes) or low 8x4x2 (12 bytes)
|
|
dword offset nf10 ; 2x2 4x4x2 (32 bytes) or 2x1 4x8x2 (24 bytes) or 1x2 4x8x2 (24 bytes)
|
|
dword offset nf11 ; 8x8x8 (64 bytes)
|
|
dword offset nf12 ; low 4x4x8 (16 bytes)
|
|
dword offset nf13 ; 2x2 4x4x0 (ie 2x2x8) (4 bytes)
|
|
dword offset nf14 ; 8x8x0 (1 byte)
|
|
dword offset nf15 ; mix 8x8x0 (2 bytes)
|
|
.code
|
|
|
|
ifdef SYMANTEC
|
|
mov ebx, ds ; Allow DS to access code
|
|
mov ecx, 0
|
|
mov ax, 3505h
|
|
int 21h
|
|
endif
|
|
|
|
NF_DECOMP_INIT 0
|
|
|
|
mov eax, nf_width
|
|
shl eax, 2
|
|
sub eax, nf_new_w
|
|
mov new_row, eax
|
|
|
|
shr nf_new_h, 1
|
|
|
|
mov eax, nf_width
|
|
lea eax, [eax*2+eax-SWIDTH]
|
|
mov nfpk_back_right, eax
|
|
|
|
mov esi, comp
|
|
mov edi, tbuf
|
|
nf_StartRow:
|
|
mov eax, w
|
|
shr eax, 1
|
|
mov wcnt,eax
|
|
ALIGN 4
|
|
nf_NextPair:
|
|
dec wcnt
|
|
js nf_NextRow
|
|
mov ebx, ops
|
|
mov al, [ebx]
|
|
inc ebx
|
|
mov ops, ebx
|
|
|
|
xor ebx, ebx
|
|
mov bl, al
|
|
shr bl, 4
|
|
and eax, 0Fh
|
|
push offset nf_NextPair
|
|
push nfpk_OpTblD[ebx*4]
|
|
jmp nfpk_OpTblD[eax*4]
|
|
|
|
nf_NextRow:
|
|
add edi, new_row
|
|
dec h
|
|
jnz nf_StartRow
|
|
LOG_LABEL "EndPkDecomp"
|
|
|
|
ifdef SYMANTEC
|
|
mov ebx, ds ; Disable DS from accessing code
|
|
mov ecx, offset DGROUP:_data_bottom[-1]
|
|
mov ax, 3505h
|
|
int 21h
|
|
endif
|
|
ret
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf0: ; No change from previous buffer
|
|
mov eax, DiffBufPtrs
|
|
jmp nf_shiftr
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf1: ; No change (and copied to screen)
|
|
add edi, SWIDTH
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf2: ; Near shift from older part of current buffer
|
|
xor eax, eax
|
|
mov al, [esi]
|
|
inc esi
|
|
mov ax, nfpk_ShiftP2[eax*2]
|
|
nf_xyc_shift:
|
|
xor ebx, ebx
|
|
mov bl, ah
|
|
shl eax, 24
|
|
sar eax, 24
|
|
sar bl, 1
|
|
pushf
|
|
add eax, nfpk_ShiftY[ebx*4]
|
|
jmp nf_shift
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf3: ; Near shift from newer part of current buffer
|
|
xor eax, eax
|
|
mov al, [esi]
|
|
inc esi
|
|
mov ax, nfpk_ShiftP2[eax*2]
|
|
neg al
|
|
neg ah
|
|
jmp nf_xyc_shift
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf4: ; Near shift from previous buffer
|
|
xor eax, eax
|
|
mov al, [esi]
|
|
inc esi
|
|
mov ax, nfpk_ShiftP1[eax*2]
|
|
jmp nf_xyp_shift
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf5: ; Far shift from previous buffer
|
|
mov ax, [esi]
|
|
add esi, 2
|
|
nf_xyp_shift:
|
|
xor ebx, ebx
|
|
mov bl, ah
|
|
shl eax, 24
|
|
sar eax, 24
|
|
sar bl, 1
|
|
pushf
|
|
add eax, nfpk_ShiftY[ebx*4]
|
|
add eax, DiffBufPtrs
|
|
jmp nf_shift
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
|
|
if COMPOPS
|
|
|
|
nf6: ; Run of no changes (must only appear in first nibble opcodes)
|
|
; Next nibble k specifies 2k+4 squares with no changes
|
|
add esp, 4 ; Next nibble is not an opcode
|
|
add ebx, 2 ; (minimum of 4 squares)
|
|
ALIGN 4
|
|
nf6a: add edi, SWIDTH*2 ; Advance over two squares
|
|
dec ebx
|
|
jz nf6z ; Last pair of squares
|
|
dec wcnt ; Same row?
|
|
jns nf6a ; Yes
|
|
add edi, new_row ; Advance to next row
|
|
dec h ; Decrement row count (should never become zero here)
|
|
mov eax, w ; Reset wcnt
|
|
shr eax ,1
|
|
dec eax
|
|
mov wcnt, eax
|
|
jmp nf6a
|
|
|
|
nf6z: retn
|
|
|
|
else
|
|
|
|
nf6: ; Far shift from current buffer
|
|
mov ax, [esi]
|
|
add esi, 2
|
|
jmp nf_xyc_shift
|
|
|
|
endif
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf_shift:
|
|
popf
|
|
if 0 ;debug
|
|
mov eax, 0
|
|
mov ebx, eax
|
|
jmp nf_solid
|
|
endif
|
|
jc nf_shiftd
|
|
|
|
nf_shiftr:
|
|
mov ebx, esi ; save esi
|
|
lea esi, [edi+eax]
|
|
mov edx, nf_width
|
|
|
|
REPEAT 3
|
|
mov eax, [esi]
|
|
mov [edi], eax
|
|
mov eax, [esi+4]
|
|
mov [edi+4], eax
|
|
add esi, edx
|
|
add edi, edx
|
|
ENDM
|
|
mov eax, [esi]
|
|
mov [edi], eax
|
|
mov eax, [esi+4]
|
|
mov [edi+4], eax
|
|
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
mov esi, ebx ; restore esi
|
|
retn
|
|
|
|
nf_shiftd:
|
|
push esi
|
|
lea esi, [edi+eax]
|
|
mov edx, nf_width
|
|
mov ebx, 000ff00ffH
|
|
|
|
REPEAT 3
|
|
mov eax, [esi]
|
|
mov ecx, eax
|
|
xor ecx, [esi+edx]
|
|
and ecx, ebx
|
|
xor eax, ecx
|
|
mov [edi], eax
|
|
mov eax, [esi+4]
|
|
mov ecx, eax
|
|
xor ecx, [esi+edx+4]
|
|
and ecx, ebx
|
|
xor eax, ecx
|
|
mov [edi+4], eax
|
|
add esi, edx
|
|
add edi, edx
|
|
ENDM
|
|
mov eax, [esi]
|
|
mov ecx, eax
|
|
xor ecx, [esi+edx]
|
|
and ecx, ebx
|
|
xor eax, ecx
|
|
mov [edi], eax
|
|
mov eax, [esi+4]
|
|
mov ecx, eax
|
|
xor ecx, [esi+edx+4]
|
|
and ecx, ebx
|
|
xor eax, ecx
|
|
mov [edi+4], eax
|
|
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
pop esi
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf7: ; 8x8x1 (10 bytes)
|
|
|
|
mov ax, [esi]
|
|
cmp al, ah
|
|
ja nf23
|
|
|
|
if 0 ;debug
|
|
add esi, 10
|
|
mov eax, 0fefefefeH
|
|
mov ebx, eax
|
|
jmp nf_solid
|
|
endif
|
|
xor eax, eax
|
|
lea ecx, nfpk_mov8
|
|
lea edx, byte ptr ds:nf7_11+2
|
|
|
|
mov al, [esi+2]
|
|
mov bl, al
|
|
xor bl, [esi+3]
|
|
and bl, 0aaH
|
|
xor al, bl
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf7_11-nf7_11)], bl
|
|
mov [edx+(nf7_12-nf7_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf7_13-nf7_11)], bl
|
|
mov [edx+(nf7_14-nf7_11)], bh
|
|
|
|
mov al, [esi+4]
|
|
mov bl, al
|
|
xor bl, [esi+5]
|
|
and bl, 0aaH
|
|
xor al, bl
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf7_31-nf7_11)], bl
|
|
mov [edx+(nf7_32-nf7_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf7_33-nf7_11)], bl
|
|
mov [edx+(nf7_34-nf7_11)], bh
|
|
|
|
lea edx, [edx+(nf7_51-nf7_11)]
|
|
|
|
mov al, [esi+6]
|
|
mov bl, al
|
|
xor bl, [esi+7]
|
|
and bl, 0aaH
|
|
xor al, bl
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf7_51-nf7_51)], bl
|
|
mov [edx+(nf7_52-nf7_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf7_53-nf7_51)], bl
|
|
mov [edx+(nf7_54-nf7_51)], bh
|
|
|
|
mov al, [esi+8]
|
|
mov bl, al
|
|
xor bl, [esi+9]
|
|
and bl, 0aaH
|
|
xor al, bl
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf7_71-nf7_51)], bl
|
|
mov [edx+(nf7_72-nf7_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf7_73-nf7_51)], bl
|
|
mov [edx+(nf7_74-nf7_51)], bh
|
|
|
|
push ebp
|
|
push esi
|
|
; load bx,dx,cx,bp with 00,01,10,11 color combinations
|
|
; (note that bits are read least significant first).
|
|
mov cx, [esi]
|
|
mov esi,nf_width
|
|
mov bl,cl
|
|
mov bh,cl
|
|
mov dl,ch
|
|
mov dh,cl
|
|
mov al,ch
|
|
mov ah,ch
|
|
mov ebp,eax
|
|
jmp nf7_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf7_0:
|
|
nf7_11: mov ax, bx
|
|
shl eax, 16
|
|
nf7_12: mov ax, bx
|
|
mov [edi], eax
|
|
nf7_13: mov ax, bx
|
|
shl eax, 16
|
|
nf7_14: mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf7_31: mov ax, bx
|
|
shl eax, 16
|
|
nf7_32: mov ax, bx
|
|
mov [edi], eax
|
|
nf7_33: mov ax, bx
|
|
shl eax, 16
|
|
nf7_34: mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf7_51: mov ax, bx
|
|
shl eax, 16
|
|
nf7_52: mov ax, bx
|
|
mov [edi], eax
|
|
nf7_53: mov ax, bx
|
|
shl eax, 16
|
|
nf7_54: mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf7_71: mov ax, bx
|
|
shl eax, 16
|
|
nf7_72: mov ax, bx
|
|
mov [edi], eax
|
|
nf7_73: mov ax, bx
|
|
shl eax, 16
|
|
nf7_74: mov ax, bx
|
|
mov [edi+4], eax
|
|
|
|
pop esi
|
|
pop ebp
|
|
add esi, 10
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
;nf7+16
|
|
nf23: ; low 4x4x1 (4 bytes)
|
|
|
|
xor eax, eax
|
|
lea ecx, nfpk_mov4l
|
|
lea edx, byte ptr ds:nf23_11+2
|
|
|
|
mov al, [esi+2]
|
|
and al, 0fH
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf23_11-nf23_11)], bl
|
|
mov [edx+(nf23_12-nf23_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf23_13-nf23_11)], bl
|
|
mov [edx+(nf23_14-nf23_11)], bh
|
|
|
|
mov al, [esi+2]
|
|
shr al, 4
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf23_31-nf23_11)], bl
|
|
mov [edx+(nf23_32-nf23_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf23_33-nf23_11)], bl
|
|
mov [edx+(nf23_34-nf23_11)], bh
|
|
|
|
|
|
mov al, [esi+3]
|
|
and al, 0fH
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf23_51-nf23_11)], bl
|
|
mov [edx+(nf23_52-nf23_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf23_53-nf23_11)], bl
|
|
mov [edx+(nf23_54-nf23_11)], bh
|
|
|
|
mov al, [esi+3]
|
|
shr al, 4
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf23_71-nf23_11)], bl
|
|
mov [edx+(nf23_72-nf23_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf23_73-nf23_11)], bl
|
|
mov [edx+(nf23_74-nf23_11)], bh
|
|
|
|
mov edx, nf_width
|
|
|
|
; load bx,cx with 00,11 color combinations
|
|
mov bx, [esi]
|
|
mov cl, bh
|
|
mov bh, bl
|
|
mov ch, cl
|
|
|
|
jmp nf23_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf23_0:
|
|
|
|
nf23_11:mov ax, bx
|
|
shl eax, 16
|
|
nf23_12:mov ax, bx
|
|
mov [edi], eax
|
|
|
|
nf23_13:mov ax, bx
|
|
shl eax, 16
|
|
nf23_14:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf23_31:mov ax, bx
|
|
shl eax, 16
|
|
nf23_32:mov ax, bx
|
|
mov [edi], eax
|
|
|
|
nf23_33:mov ax, bx
|
|
shl eax, 16
|
|
nf23_34:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf23_51:mov ax, bx
|
|
shl eax, 16
|
|
nf23_52:mov ax, bx
|
|
mov [edi], eax
|
|
|
|
nf23_53:mov ax, bx
|
|
shl eax, 16
|
|
nf23_54:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf23_71:mov ax, bx
|
|
shl eax, 16
|
|
nf23_72:mov ax, bx
|
|
mov [edi], eax
|
|
|
|
nf23_73:mov ax, bx
|
|
shl eax, 16
|
|
nf23_74:mov ax, bx
|
|
mov [edi+4], eax
|
|
|
|
sub edi, nfpk_back_right
|
|
add esi, 4
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf8: ; 2x2 4x4x1 (16 bytes)
|
|
|
|
mov ax, [esi]
|
|
cmp al, ah
|
|
ja nf24
|
|
|
|
; <WIP> Note: This could be made faster with a new (16 16-bit entry) table.
|
|
|
|
xor eax, eax
|
|
lea ecx, nfpk_mov8
|
|
lea edx, byte ptr ds:nf8_11+2
|
|
|
|
mov al, [esi+2]
|
|
mov bl, al
|
|
shr bl, 4
|
|
xor bl, al
|
|
and bl, 0aH
|
|
xor al, bl
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf8_11-nf8_11)], bl
|
|
mov [edx+(nf8_12-nf8_11)], bh
|
|
|
|
mov al, [esi+3]
|
|
mov bl, al
|
|
shr bl, 4
|
|
xor bl, al
|
|
and bl, 0aH
|
|
xor al, bl
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf8_21-nf8_11)], bl
|
|
mov [edx+(nf8_22-nf8_11)], bh
|
|
|
|
|
|
mov al, [esi+6]
|
|
mov bl, al
|
|
shr bl, 4
|
|
xor bl, al
|
|
and bl, 0aH
|
|
xor al, bl
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf8_31-nf8_11)], bl
|
|
mov [edx+(nf8_32-nf8_11)], bh
|
|
|
|
mov al, [esi+7]
|
|
mov bl, al
|
|
shr bl, 4
|
|
xor bl, al
|
|
and bl, 0aH
|
|
xor al, bl
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf8_41-nf8_11)], bl
|
|
mov [edx+(nf8_42-nf8_11)], bh
|
|
|
|
add edx, nf8_51-nf8_11
|
|
|
|
mov al, [esi+10]
|
|
mov bl, al
|
|
shr bl, 4
|
|
xor bl, al
|
|
and bl, 0aH
|
|
xor al, bl
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf8_51-nf8_51)], bl
|
|
mov [edx+(nf8_52-nf8_51)], bh
|
|
|
|
mov al, [esi+11]
|
|
mov bl, al
|
|
shr bl, 4
|
|
xor bl, al
|
|
and bl, 0aH
|
|
xor al, bl
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf8_61-nf8_51)], bl
|
|
mov [edx+(nf8_62-nf8_51)], bh
|
|
|
|
|
|
mov al, [esi+14]
|
|
mov bl, al
|
|
shr bl, 4
|
|
xor bl, al
|
|
and bl, 0aH
|
|
xor al, bl
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf8_71-nf8_51)], bl
|
|
mov [edx+(nf8_72-nf8_51)], bh
|
|
|
|
mov al, [esi+15]
|
|
mov bl, al
|
|
shr bl, 4
|
|
xor bl, al
|
|
and bl, 0aH
|
|
xor al, bl
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf8_81-nf8_51)], bl
|
|
mov [edx+(nf8_82-nf8_51)], bh
|
|
|
|
|
|
push ebp
|
|
push esi
|
|
; load bx,dx,cx,bp with 00,01,10,11 color combinations
|
|
; (note that bits are read least significant first).
|
|
mov cx, [esi]
|
|
mov esi, nf_width
|
|
mov bl,cl
|
|
mov bh,cl
|
|
mov dl,ch
|
|
mov dh,cl
|
|
mov al,ch
|
|
mov ah,ch
|
|
mov ebp,eax
|
|
|
|
jmp nf8_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf8_0:
|
|
nf8_11: mov ax, bx
|
|
shl eax, 16
|
|
nf8_12: mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
|
|
nf8_21: mov ax, bx
|
|
shl eax, 16
|
|
nf8_22: mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
|
|
mov eax, [esp]
|
|
mov cx, [eax+4]
|
|
mov bl,cl
|
|
mov bh,cl
|
|
mov dl,ch
|
|
mov dh,cl
|
|
mov al,ch
|
|
mov ah,ch
|
|
mov ebp,eax
|
|
|
|
nf8_31: mov ax, bx
|
|
shl eax, 16
|
|
nf8_32: mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
|
|
nf8_41: mov ax, bx
|
|
shl eax, 16
|
|
nf8_42: mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
|
|
lea eax, [esi*4-4]
|
|
sub edi, eax
|
|
|
|
mov eax, [esp]
|
|
mov cx, [eax+8]
|
|
mov bl,cl
|
|
mov bh,cl
|
|
mov dl,ch
|
|
mov dh,cl
|
|
mov al,ch
|
|
mov ah,ch
|
|
mov ebp,eax
|
|
|
|
nf8_51: mov ax, bx
|
|
shl eax, 16
|
|
nf8_52: mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
|
|
nf8_61: mov ax, bx
|
|
shl eax, 16
|
|
nf8_62: mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
|
|
mov eax, [esp]
|
|
mov cx, [eax+12]
|
|
mov bl,cl
|
|
mov bh,cl
|
|
mov dl,ch
|
|
mov dh,cl
|
|
mov al,ch
|
|
mov ah,ch
|
|
mov ebp,eax
|
|
|
|
nf8_71: mov ax, bx
|
|
shl eax, 16
|
|
nf8_72: mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
|
|
nf8_81: mov ax, bx
|
|
shl eax, 16
|
|
nf8_82: mov ax, bx
|
|
mov [edi], eax
|
|
|
|
pop esi
|
|
pop ebp
|
|
add esi, 16
|
|
sub edi, 4
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
;nf8+16
|
|
nf24: ; 2x1 4x8x1 (12 bytes)
|
|
|
|
mov ax, [esi+6]
|
|
cmp al, ah
|
|
ja nf40
|
|
|
|
; <WIP> Note: This could be made faster with a new (16 16-bit entry) table.
|
|
|
|
xor eax, eax
|
|
lea ecx, nfpk_mov8
|
|
lea edx, byte ptr ds:nf24_11+2
|
|
|
|
mov al, [esi+2]
|
|
mov bl, al
|
|
shr bl, 4
|
|
xor bl, al
|
|
and bl, 0aH
|
|
xor al, bl
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf24_11-nf24_11)], bl
|
|
mov [edx+(nf24_12-nf24_11)], bh
|
|
|
|
mov al, [esi+3]
|
|
mov bl, al
|
|
shr bl, 4
|
|
xor bl, al
|
|
and bl, 0aH
|
|
xor al, bl
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf24_21-nf24_11)], bl
|
|
mov [edx+(nf24_22-nf24_11)], bh
|
|
|
|
mov al, [esi+4]
|
|
mov bl, al
|
|
shr bl, 4
|
|
xor bl, al
|
|
and bl, 0aH
|
|
xor al, bl
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf24_31-nf24_11)], bl
|
|
mov [edx+(nf24_32-nf24_11)], bh
|
|
|
|
mov al, [esi+5]
|
|
mov bl, al
|
|
shr bl, 4
|
|
xor bl, al
|
|
and bl, 0aH
|
|
xor al, bl
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf24_41-nf24_11)], bl
|
|
mov [edx+(nf24_42-nf24_11)], bh
|
|
|
|
add edx, nf24_51-nf24_11
|
|
|
|
mov al, [esi+8]
|
|
mov bl, al
|
|
shr bl, 4
|
|
xor bl, al
|
|
and bl, 0aH
|
|
xor al, bl
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf24_51-nf24_51)], bl
|
|
mov [edx+(nf24_52-nf24_51)], bh
|
|
|
|
mov al, [esi+9]
|
|
mov bl, al
|
|
shr bl, 4
|
|
xor bl, al
|
|
and bl, 0aH
|
|
xor al, bl
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf24_61-nf24_51)], bl
|
|
mov [edx+(nf24_62-nf24_51)], bh
|
|
|
|
mov al, [esi+10]
|
|
mov bl, al
|
|
shr bl, 4
|
|
xor bl, al
|
|
and bl, 0aH
|
|
xor al, bl
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf24_71-nf24_51)], bl
|
|
mov [edx+(nf24_72-nf24_51)], bh
|
|
|
|
mov al, [esi+11]
|
|
mov bl, al
|
|
shr bl, 4
|
|
xor bl, al
|
|
and bl, 0aH
|
|
xor al, bl
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf24_81-nf24_51)], bl
|
|
mov [edx+(nf24_82-nf24_51)], bh
|
|
|
|
push ebp
|
|
push esi
|
|
; load bx,dx,cx,bp with 00,01,10,11 color combinations
|
|
; (note that bits are read least significant first).
|
|
mov cx, [esi]
|
|
mov esi, nf_width
|
|
mov bl,cl
|
|
mov bh,cl
|
|
mov dl,ch
|
|
mov dh,cl
|
|
mov al,ch
|
|
mov ah,ch
|
|
mov ebp,eax
|
|
|
|
jmp nf24_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf24_0:
|
|
nf24_11:mov ax, bx
|
|
shl eax, 16
|
|
nf24_12:mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
|
|
nf24_21:mov ax, bx
|
|
shl eax, 16
|
|
nf24_22:mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
|
|
nf24_31:mov ax, bx
|
|
shl eax, 16
|
|
nf24_32:mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
|
|
nf24_41:mov ax, bx
|
|
shl eax, 16
|
|
nf24_42:mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
|
|
lea eax, [esi*4-4]
|
|
sub edi, eax
|
|
|
|
mov eax, [esp]
|
|
mov cx, [eax+6]
|
|
mov bl,cl
|
|
mov bh,cl
|
|
mov dl,ch
|
|
mov dh,cl
|
|
mov al,ch
|
|
mov ah,ch
|
|
mov ebp,eax
|
|
|
|
nf24_51:mov ax, bx
|
|
shl eax, 16
|
|
nf24_52:mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
|
|
nf24_61:mov ax, bx
|
|
shl eax, 16
|
|
nf24_62:mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
|
|
nf24_71:mov ax, bx
|
|
shl eax, 16
|
|
nf24_72:mov ax, bx
|
|
mov [edi], eax
|
|
add edi, esi
|
|
|
|
nf24_81:mov ax, bx
|
|
shl eax, 16
|
|
nf24_82:mov ax, bx
|
|
mov [edi], eax
|
|
|
|
pop esi
|
|
pop ebp
|
|
add esi, 12
|
|
sub edi, 4
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
;nf8+32
|
|
nf40: ; 1x2 8x4x1 (12 bytes)
|
|
xor eax, eax
|
|
lea ecx, nfpk_mov8
|
|
lea edx, byte ptr ds:nf40_11+2
|
|
|
|
mov al, [esi+2]
|
|
mov bl, al
|
|
xor bl, [esi+3]
|
|
and bl, 0ccH
|
|
xor al, bl
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf40_11-nf40_11)], bl
|
|
mov [edx+(nf40_12-nf40_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf40_13-nf40_11)], bl
|
|
mov [edx+(nf40_14-nf40_11)], bh
|
|
|
|
mov al, [esi+4]
|
|
mov bl, al
|
|
xor bl, [esi+5]
|
|
and bl, 0ccH
|
|
xor al, bl
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf40_31-nf40_11)], bl
|
|
mov [edx+(nf40_32-nf40_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf40_33-nf40_11)], bl
|
|
mov [edx+(nf40_34-nf40_11)], bh
|
|
|
|
add edx, nf40_51-nf40_11
|
|
|
|
mov al, [esi+8]
|
|
mov bl, al
|
|
xor bl, [esi+9]
|
|
and bl, 0ccH
|
|
xor al, bl
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf40_51-nf40_51)], bl
|
|
mov [edx+(nf40_52-nf40_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf40_53-nf40_51)], bl
|
|
mov [edx+(nf40_54-nf40_51)], bh
|
|
|
|
mov al, [esi+10]
|
|
mov bl, al
|
|
xor bl, [esi+11]
|
|
and bl, 0ccH
|
|
xor al, bl
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf40_71-nf40_51)], bl
|
|
mov [edx+(nf40_72-nf40_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf40_73-nf40_51)], bl
|
|
mov [edx+(nf40_74-nf40_51)], bh
|
|
|
|
push ebp
|
|
push esi
|
|
; load bx,dx,cx,bp with 00,01,10,11 color combinations
|
|
; (note that bits are read least significant first).
|
|
mov cx, [esi]
|
|
mov esi, nf_width
|
|
mov bl,cl
|
|
mov bh,cl
|
|
mov dl,ch
|
|
mov dh,cl
|
|
mov al,ch
|
|
mov ah,ch
|
|
mov ebp,eax
|
|
|
|
jmp nf40_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf40_0:
|
|
nf40_11:mov ax, bx
|
|
shl eax, 16
|
|
nf40_12:mov ax, bx
|
|
mov [edi], eax
|
|
nf40_13:mov ax, bx
|
|
shl eax, 16
|
|
nf40_14:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf40_31:mov ax, bx
|
|
shl eax, 16
|
|
nf40_32:mov ax, bx
|
|
mov [edi], eax
|
|
nf40_33:mov ax, bx
|
|
shl eax, 16
|
|
nf40_34:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
mov eax, [esp]
|
|
mov cx, [eax+6]
|
|
mov bl,cl
|
|
mov bh,cl
|
|
mov dl,ch
|
|
mov dh,cl
|
|
mov al,ch
|
|
mov ah,ch
|
|
mov ebp,eax
|
|
|
|
nf40_51:mov ax, bx
|
|
shl eax, 16
|
|
nf40_52:mov ax, bx
|
|
mov [edi], eax
|
|
nf40_53:mov ax, bx
|
|
shl eax, 16
|
|
nf40_54:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf40_71:mov ax, bx
|
|
shl eax, 16
|
|
nf40_72:mov ax, bx
|
|
mov [edi], eax
|
|
nf40_73:mov ax, bx
|
|
shl eax, 16
|
|
nf40_74:mov ax, bx
|
|
mov [edi+4], eax
|
|
|
|
pop esi
|
|
pop ebp
|
|
add esi, 12
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf9: ; 8x8x2 (20 bytes)
|
|
|
|
mov eax, [esi]
|
|
cmp al, ah
|
|
ja nf41
|
|
|
|
shr eax, 16
|
|
cmp al, ah
|
|
ja nf25
|
|
|
|
xor eax, eax
|
|
lea ecx, nfpk_mov4
|
|
lea edx, byte ptr ds:nf9_11+1
|
|
|
|
mov al, [esi+4]
|
|
mov bl, al
|
|
xor bl, [esi+6]
|
|
and bl, 0ccH
|
|
xor al, bl
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_11-nf9_11)], bl
|
|
mov [edx+(nf9_12-nf9_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf9_13-nf9_11)], bl
|
|
mov [edx+(nf9_14-nf9_11)], bh
|
|
|
|
mov al, [esi+5]
|
|
mov bl, al
|
|
xor bl, [esi+7]
|
|
and bl, 0ccH
|
|
xor al, bl
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_15-nf9_11)], bl
|
|
mov [edx+(nf9_16-nf9_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf9_17-nf9_11)], bl
|
|
mov [edx+(nf9_18-nf9_11)], bh
|
|
|
|
|
|
mov al, [esi+8]
|
|
mov bl, al
|
|
xor bl, [esi+10]
|
|
and bl, 0ccH
|
|
xor al, bl
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_31-nf9_11)], bl
|
|
mov [edx+(nf9_32-nf9_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf9_33-nf9_11)], bl
|
|
mov [edx+(nf9_34-nf9_11)], bh
|
|
|
|
mov al, [esi+9]
|
|
mov bl, al
|
|
xor bl, [esi+11]
|
|
and bl, 0ccH
|
|
xor al, bl
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_35-nf9_11)], bl
|
|
mov [edx+(nf9_36-nf9_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf9_37-nf9_11)], bl
|
|
mov [edx+(nf9_38-nf9_11)], bh
|
|
|
|
lea edx, [edx+(nf9_51-nf9_11)]
|
|
|
|
mov al, [esi+12]
|
|
mov bl, al
|
|
xor bl, [esi+14]
|
|
and bl, 0ccH
|
|
xor al, bl
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_51-nf9_51)], bl
|
|
mov [edx+(nf9_52-nf9_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf9_53-nf9_51)], bl
|
|
mov [edx+(nf9_54-nf9_51)], bh
|
|
|
|
mov al, [esi+13]
|
|
mov bl, al
|
|
xor bl, [esi+15]
|
|
and bl, 0ccH
|
|
xor al, bl
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_55-nf9_51)], bl
|
|
mov [edx+(nf9_56-nf9_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf9_57-nf9_51)], bl
|
|
mov [edx+(nf9_58-nf9_51)], bh
|
|
|
|
|
|
mov al, [esi+16]
|
|
mov bl, al
|
|
xor bl, [esi+18]
|
|
and bl, 0ccH
|
|
xor al, bl
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_71-nf9_51)], bl
|
|
mov [edx+(nf9_72-nf9_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf9_73-nf9_51)], bl
|
|
mov [edx+(nf9_74-nf9_51)], bh
|
|
|
|
mov al, [esi+17]
|
|
mov bl, al
|
|
xor bl, [esi+19]
|
|
and bl, 0ccH
|
|
xor al, bl
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_75-nf9_51)], bl
|
|
mov [edx+(nf9_76-nf9_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf9_77-nf9_51)], bl
|
|
mov [edx+(nf9_78-nf9_51)], bh
|
|
|
|
; Load bl,bh,cl,ch with four colors
|
|
mov bx, [esi]
|
|
mov cx, [esi+2]
|
|
|
|
mov edx, nf_width
|
|
jmp nf9_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf9_0:
|
|
nf9_11: mov al, bl
|
|
nf9_12: mov ah, bl
|
|
shl eax, 16
|
|
nf9_13: mov al, bl
|
|
nf9_14: mov ah, bl
|
|
mov [edi], eax
|
|
|
|
nf9_15: mov al, bl
|
|
nf9_16: mov ah, bl
|
|
shl eax, 16
|
|
nf9_17: mov al, bl
|
|
nf9_18: mov ah, bl
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf9_31: mov al, bl
|
|
nf9_32: mov ah, bl
|
|
shl eax, 16
|
|
nf9_33: mov al, bl
|
|
nf9_34: mov ah, bl
|
|
mov [edi], eax
|
|
|
|
nf9_35: mov al, bl
|
|
nf9_36: mov ah, bl
|
|
shl eax, 16
|
|
nf9_37: mov al, bl
|
|
nf9_38: mov ah, bl
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf9_51: mov al, bl
|
|
nf9_52: mov ah, bl
|
|
shl eax, 16
|
|
nf9_53: mov al, bl
|
|
nf9_54: mov ah, bl
|
|
mov [edi], eax
|
|
|
|
nf9_55: mov al, bl
|
|
nf9_56: mov ah, bl
|
|
shl eax, 16
|
|
nf9_57: mov al, bl
|
|
nf9_58: mov ah, bl
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf9_71: mov al, bl
|
|
nf9_72: mov ah, bl
|
|
shl eax, 16
|
|
nf9_73: mov al, bl
|
|
nf9_74: mov ah, bl
|
|
mov [edi], eax
|
|
|
|
nf9_75: mov al, bl
|
|
nf9_76: mov ah, bl
|
|
shl eax, 16
|
|
nf9_77: mov al, bl
|
|
nf9_78: mov ah, bl
|
|
mov [edi+4], eax
|
|
|
|
add esi, 20
|
|
sub edi, nfpk_back_right
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
;nf9+16
|
|
nf25: ; low 4x4x2 (8 bytes)
|
|
|
|
if 0 ;debug
|
|
mov eax, 0
|
|
mov ebx, 0
|
|
add esi, 8
|
|
jmp nf_solid
|
|
endif
|
|
|
|
xor eax, eax
|
|
lea ecx, nfpk_mov4
|
|
lea edx, byte ptr ds:nf25_11+1
|
|
|
|
mov al, [esi+4]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf25_14-nf25_11)], bl
|
|
mov [edx+(nf25_13-nf25_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf25_12-nf25_11)], bl
|
|
mov [edx+(nf25_11-nf25_11)], bh
|
|
|
|
mov al, [esi+5]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf25_24-nf25_11)], bl
|
|
mov [edx+(nf25_23-nf25_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf25_22-nf25_11)], bl
|
|
mov [edx+(nf25_21-nf25_11)], bh
|
|
|
|
|
|
mov al, [esi+6]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf25_34-nf25_11)], bl
|
|
mov [edx+(nf25_33-nf25_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf25_32-nf25_11)], bl
|
|
mov [edx+(nf25_31-nf25_11)], bh
|
|
|
|
mov al, [esi+7]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf25_44-nf25_11)], bl
|
|
mov [edx+(nf25_43-nf25_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf25_42-nf25_11)], bl
|
|
mov [edx+(nf25_41-nf25_11)], bh
|
|
|
|
; Load bl,bh,cl,ch with four colors
|
|
mov bx, [esi]
|
|
mov cx, [esi+2]
|
|
|
|
mov edx, nf_width
|
|
jmp nf25_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf25_0:
|
|
nf25_11:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf25_12:mov al, bl
|
|
mov ah, al
|
|
mov [edi], eax
|
|
nf25_13:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf25_14:mov al, bl
|
|
mov ah, al
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf25_21:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf25_22:mov al, bl
|
|
mov ah, al
|
|
mov [edi], eax
|
|
nf25_23:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf25_24:mov al, bl
|
|
mov ah, al
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf25_31:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf25_32:mov al, bl
|
|
mov ah, al
|
|
mov [edi], eax
|
|
nf25_33:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf25_34:mov al, bl
|
|
mov ah, al
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf25_41:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf25_42:mov al, bl
|
|
mov ah, al
|
|
mov [edi], eax
|
|
nf25_43:mov ah, bl
|
|
mov al, ah
|
|
shl eax, 16
|
|
nf25_44:mov al, bl
|
|
mov ah, al
|
|
mov [edi+4], eax
|
|
|
|
add esi, 8
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
;nf9+32
|
|
nf41: ; low 4x8x2 (12 bytes)
|
|
shr eax, 16
|
|
cmp al, ah
|
|
ja nf57
|
|
|
|
if 0 ;debug
|
|
mov eax, 0
|
|
mov ebx, 0
|
|
add esi, 12
|
|
jmp nf_solid
|
|
endif
|
|
|
|
xor eax, eax
|
|
lea ecx, nfpk_mov4
|
|
lea edx, byte ptr ds:nf41_11+1
|
|
|
|
mov al, [esi+4]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf41_14-nf41_11)], bl
|
|
mov [edx+(nf41_13-nf41_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf41_12-nf41_11)], bl
|
|
mov [edx+(nf41_11-nf41_11)], bh
|
|
|
|
mov al, [esi+5]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf41_24-nf41_11)], bl
|
|
mov [edx+(nf41_23-nf41_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf41_22-nf41_11)], bl
|
|
mov [edx+(nf41_21-nf41_11)], bh
|
|
|
|
|
|
mov al, [esi+6]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf41_34-nf41_11)], bl
|
|
mov [edx+(nf41_33-nf41_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf41_32-nf41_11)], bl
|
|
mov [edx+(nf41_31-nf41_11)], bh
|
|
|
|
mov al, [esi+7]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf41_44-nf41_11)], bl
|
|
mov [edx+(nf41_43-nf41_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf41_42-nf41_11)], bl
|
|
mov [edx+(nf41_41-nf41_11)], bh
|
|
|
|
lea edx, [edx+(nf41_51-nf41_11)]
|
|
|
|
mov al, [esi+8]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf41_54-nf41_51)], bl
|
|
mov [edx+(nf41_53-nf41_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf41_52-nf41_51)], bl
|
|
mov [edx+(nf41_51-nf41_51)], bh
|
|
|
|
mov al, [esi+9]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf41_64-nf41_51)], bl
|
|
mov [edx+(nf41_63-nf41_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf41_62-nf41_51)], bl
|
|
mov [edx+(nf41_61-nf41_51)], bh
|
|
|
|
|
|
mov al, [esi+10]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf41_74-nf41_51)], bl
|
|
mov [edx+(nf41_73-nf41_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf41_72-nf41_51)], bl
|
|
mov [edx+(nf41_71-nf41_51)], bh
|
|
|
|
mov al, [esi+11]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf41_84-nf41_51)], bl
|
|
mov [edx+(nf41_83-nf41_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf41_82-nf41_51)], bl
|
|
mov [edx+(nf41_81-nf41_51)], bh
|
|
|
|
; Load bl,bh,cl,ch with four colors
|
|
mov bx, [esi]
|
|
mov cx, [esi+2]
|
|
|
|
mov edx, nf_width
|
|
jmp nf41_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf41_0:
|
|
nf41_11:mov ah, bl
|
|
mov al, ah
|
|
nf41_21:mov ah, bl
|
|
shl eax, 16
|
|
nf41_22:mov al, bl
|
|
mov ah, al
|
|
nf41_12:mov al, bl
|
|
mov [edi], eax
|
|
nf41_13:mov ah, bl
|
|
mov al, ah
|
|
nf41_23:mov ah, bl
|
|
shl eax, 16
|
|
nf41_24:mov al, bl
|
|
mov ah, al
|
|
nf41_14:mov al, bl
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf41_31:mov ah, bl
|
|
mov al, ah
|
|
nf41_41:mov ah, bl
|
|
shl eax, 16
|
|
nf41_42:mov al, bl
|
|
mov ah, al
|
|
nf41_32:mov al, bl
|
|
mov [edi], eax
|
|
nf41_33:mov ah, bl
|
|
mov al, ah
|
|
nf41_43:mov ah, bl
|
|
shl eax, 16
|
|
nf41_44:mov al, bl
|
|
mov ah, al
|
|
nf41_34:mov al, bl
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf41_51:mov ah, bl
|
|
mov al, ah
|
|
nf41_61:mov ah, bl
|
|
shl eax, 16
|
|
nf41_62:mov al, bl
|
|
mov ah, al
|
|
nf41_52:mov al, bl
|
|
mov [edi], eax
|
|
nf41_53:mov ah, bl
|
|
mov al, ah
|
|
nf41_63:mov ah, bl
|
|
shl eax, 16
|
|
nf41_64:mov al, bl
|
|
mov ah, al
|
|
nf41_54:mov al, bl
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf41_71:mov ah, bl
|
|
mov al, ah
|
|
nf41_81:mov ah, bl
|
|
shl eax, 16
|
|
nf41_82:mov al, bl
|
|
mov ah, al
|
|
nf41_72:mov al, bl
|
|
mov [edi], eax
|
|
nf41_73:mov ah, bl
|
|
mov al, ah
|
|
nf41_83:mov ah, bl
|
|
shl eax, 16
|
|
nf41_84:mov al, bl
|
|
mov ah, al
|
|
nf41_74:mov al, bl
|
|
mov [edi+4], eax
|
|
|
|
add esi, 12
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
;nf9+48
|
|
nf57: ; low 8x4x2 (12 bytes)
|
|
xor eax, eax
|
|
lea ecx, nfpk_mov4
|
|
lea edx, byte ptr ds:nf57_11+1
|
|
|
|
mov al, [esi+4]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf57_11-nf57_11)], bl
|
|
mov [edx+(nf57_12-nf57_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf57_13-nf57_11)], bl
|
|
mov [edx+(nf57_14-nf57_11)], bh
|
|
|
|
mov al, [esi+5]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf57_15-nf57_11)], bl
|
|
mov [edx+(nf57_16-nf57_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf57_17-nf57_11)], bl
|
|
mov [edx+(nf57_18-nf57_11)], bh
|
|
|
|
|
|
mov al, [esi+6]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf57_21-nf57_11)], bl
|
|
mov [edx+(nf57_22-nf57_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf57_23-nf57_11)], bl
|
|
mov [edx+(nf57_24-nf57_11)], bh
|
|
|
|
mov al, [esi+7]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf57_25-nf57_11)], bl
|
|
mov [edx+(nf57_26-nf57_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf57_27-nf57_11)], bl
|
|
mov [edx+(nf57_28-nf57_11)], bh
|
|
|
|
|
|
mov al, [esi+8]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf57_31-nf57_11)], bl
|
|
mov [edx+(nf57_32-nf57_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf57_33-nf57_11)], bl
|
|
mov [edx+(nf57_34-nf57_11)], bh
|
|
|
|
mov al, [esi+9]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf57_35-nf57_11)], bl
|
|
mov [edx+(nf57_36-nf57_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf57_37-nf57_11)], bl
|
|
mov [edx+(nf57_38-nf57_11)], bh
|
|
|
|
|
|
mov al, [esi+10]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf57_41-nf57_11)], bl
|
|
mov [edx+(nf57_42-nf57_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf57_43-nf57_11)], bl
|
|
mov [edx+(nf57_44-nf57_11)], bh
|
|
|
|
mov al, [esi+11]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf57_45-nf57_11)], bl
|
|
mov [edx+(nf57_46-nf57_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf57_47-nf57_11)], bl
|
|
mov [edx+(nf57_48-nf57_11)], bh
|
|
|
|
; Load bl,bh,cl,ch with four colors
|
|
mov bx, [esi]
|
|
mov cx, [esi+2]
|
|
|
|
mov edx, nf_width
|
|
jmp nf57_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf57_0:
|
|
nf57_11:mov al, bl
|
|
nf57_12:mov ah, bl
|
|
shl eax, 16
|
|
nf57_13:mov al, bl
|
|
nf57_14:mov ah, bl
|
|
mov [edi], eax
|
|
|
|
nf57_15:mov al, bl
|
|
nf57_16:mov ah, bl
|
|
shl eax, 16
|
|
nf57_17:mov al, bl
|
|
nf57_18:mov ah, bl
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf57_21:mov al, bl
|
|
nf57_22:mov ah, bl
|
|
shl eax, 16
|
|
nf57_23:mov al, bl
|
|
nf57_24:mov ah, bl
|
|
mov [edi], eax
|
|
|
|
nf57_25:mov al, bl
|
|
nf57_26:mov ah, bl
|
|
shl eax, 16
|
|
nf57_27:mov al, bl
|
|
nf57_28:mov ah, bl
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf57_31:mov al, bl
|
|
nf57_32:mov ah, bl
|
|
shl eax, 16
|
|
nf57_33:mov al, bl
|
|
nf57_34:mov ah, bl
|
|
mov [edi], eax
|
|
|
|
nf57_35:mov al, bl
|
|
nf57_36:mov ah, bl
|
|
shl eax, 16
|
|
nf57_37:mov al, bl
|
|
nf57_38:mov ah, bl
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf57_41:mov al, bl
|
|
nf57_42:mov ah, bl
|
|
shl eax, 16
|
|
nf57_43:mov al, bl
|
|
nf57_44:mov ah, bl
|
|
mov [edi], eax
|
|
|
|
nf57_45:mov al, bl
|
|
nf57_46:mov ah, bl
|
|
shl eax, 16
|
|
nf57_47:mov al, bl
|
|
nf57_48:mov ah, bl
|
|
mov [edi+4], eax
|
|
|
|
add esi, 12
|
|
sub edi, nfpk_back_right
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf10: ; 2x2 4x4x2 (32 bytes)
|
|
|
|
mov ax, [esi]
|
|
cmp al, ah
|
|
ja nf26
|
|
|
|
xor eax, eax
|
|
lea ecx, nfpk_mov4
|
|
lea edx, byte ptr ds:nf10_11+1
|
|
|
|
mov al, [esi+4]
|
|
mov bl, al
|
|
xor bl, [esi+5]
|
|
and bl, 0ccH
|
|
xor al, bl
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_11-nf10_11)], bl
|
|
mov [edx+(nf10_12-nf10_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf10_13-nf10_11)], bl
|
|
mov [edx+(nf10_14-nf10_11)], bh
|
|
|
|
mov al, [esi+6]
|
|
mov bl, al
|
|
xor bl, [esi+7]
|
|
and bl, 0ccH
|
|
xor al, bl
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_21-nf10_11)], bl
|
|
mov [edx+(nf10_22-nf10_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf10_23-nf10_11)], bl
|
|
mov [edx+(nf10_24-nf10_11)], bh
|
|
|
|
mov al, [esi+12]
|
|
mov bl, al
|
|
xor bl, [esi+13]
|
|
and bl, 0ccH
|
|
xor al, bl
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_31-nf10_11)], bl
|
|
mov [edx+(nf10_32-nf10_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf10_33-nf10_11)], bl
|
|
mov [edx+(nf10_34-nf10_11)], bh
|
|
|
|
mov al, [esi+14]
|
|
mov bl, al
|
|
xor bl, [esi+15]
|
|
and bl, 0ccH
|
|
xor al, bl
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_41-nf10_11)], bl
|
|
mov [edx+(nf10_42-nf10_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf10_43-nf10_11)], bl
|
|
mov [edx+(nf10_44-nf10_11)], bh
|
|
|
|
lea edx, [edx+(nf10_51-nf10_11)]
|
|
|
|
mov al, [esi+20]
|
|
mov bl, al
|
|
xor bl, [esi+21]
|
|
and bl, 0ccH
|
|
xor al, bl
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_51-nf10_51)], bl
|
|
mov [edx+(nf10_52-nf10_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf10_53-nf10_51)], bl
|
|
mov [edx+(nf10_54-nf10_51)], bh
|
|
|
|
mov al, [esi+22]
|
|
mov bl, al
|
|
xor bl, [esi+23]
|
|
and bl, 0ccH
|
|
xor al, bl
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_61-nf10_51)], bl
|
|
mov [edx+(nf10_62-nf10_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf10_63-nf10_51)], bl
|
|
mov [edx+(nf10_64-nf10_51)], bh
|
|
|
|
mov al, [esi+28]
|
|
mov bl, al
|
|
xor bl, [esi+29]
|
|
and bl, 0ccH
|
|
xor al, bl
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_71-nf10_51)], bl
|
|
mov [edx+(nf10_72-nf10_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf10_73-nf10_51)], bl
|
|
mov [edx+(nf10_74-nf10_51)], bh
|
|
|
|
mov al, [esi+30]
|
|
mov bl, al
|
|
xor bl, [esi+31]
|
|
and bl, 0ccH
|
|
xor al, bl
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_81-nf10_51)], bl
|
|
mov [edx+(nf10_82-nf10_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf10_83-nf10_51)], bl
|
|
mov [edx+(nf10_84-nf10_51)], bh
|
|
|
|
; Load bl,bh,cl,ch with four colors
|
|
mov bx, [esi]
|
|
mov cx, [esi+2]
|
|
|
|
mov edx, nf_width
|
|
jmp nf10_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf10_0:
|
|
nf10_11:mov al, bl
|
|
nf10_12:mov ah, bl
|
|
shl eax, 16
|
|
nf10_13:mov al, bl
|
|
nf10_14:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf10_21:mov al, bl
|
|
nf10_22:mov ah, bl
|
|
shl eax, 16
|
|
nf10_23:mov al, bl
|
|
nf10_24:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
; Load bl,bh,cl,ch with four colors
|
|
mov bx, [esi+8]
|
|
mov cx, [esi+10]
|
|
|
|
nf10_31:mov al, bl
|
|
nf10_32:mov ah, bl
|
|
shl eax, 16
|
|
nf10_33:mov al, bl
|
|
nf10_34:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf10_41:mov al, bl
|
|
nf10_42:mov ah, bl
|
|
shl eax, 16
|
|
nf10_43:mov al, bl
|
|
nf10_44:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
lea eax, [edx*4-4]
|
|
sub edi, eax
|
|
|
|
; Load bl,bh,cl,ch with four colors
|
|
mov bx, [esi+16]
|
|
mov cx, [esi+18]
|
|
|
|
nf10_51:mov al, bl
|
|
nf10_52:mov ah, bl
|
|
shl eax, 16
|
|
nf10_53:mov al, bl
|
|
nf10_54:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf10_61:mov al, bl
|
|
nf10_62:mov ah, bl
|
|
shl eax, 16
|
|
nf10_63:mov al, bl
|
|
nf10_64:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
; Load bl,bh,cl,ch with four colors
|
|
mov bx, [esi+24]
|
|
mov cx, [esi+26]
|
|
|
|
nf10_71:mov al, bl
|
|
nf10_72:mov ah, bl
|
|
shl eax, 16
|
|
nf10_73:mov al, bl
|
|
nf10_74:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf10_81:mov al, bl
|
|
nf10_82:mov ah, bl
|
|
shl eax, 16
|
|
nf10_83:mov al, bl
|
|
nf10_84:mov ah, bl
|
|
mov [edi], eax
|
|
|
|
add esi, 32
|
|
sub edi, 4
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
;nf10+16
|
|
nf26: ; 2x1 4x8x2 (24 bytes)
|
|
|
|
mov ax, [esi+12]
|
|
cmp al, ah
|
|
ja nf42
|
|
|
|
if 0 ;debug
|
|
mov eax, 0
|
|
mov ebx, 0
|
|
add esi, 24
|
|
jmp nf_solid
|
|
endif
|
|
|
|
xor eax, eax
|
|
lea ecx, nfpk_mov4
|
|
lea edx, byte ptr ds:nf26_11+1
|
|
|
|
mov al, [esi+4]
|
|
mov bl, al
|
|
xor bl, [esi+5]
|
|
and bl, 0ccH
|
|
xor al, bl
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_11-nf26_11)], bl
|
|
mov [edx+(nf26_12-nf26_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf26_13-nf26_11)], bl
|
|
mov [edx+(nf26_14-nf26_11)], bh
|
|
|
|
mov al, [esi+6]
|
|
mov bl, al
|
|
xor bl, [esi+7]
|
|
and bl, 0ccH
|
|
xor al, bl
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_21-nf26_11)], bl
|
|
mov [edx+(nf26_22-nf26_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf26_23-nf26_11)], bl
|
|
mov [edx+(nf26_24-nf26_11)], bh
|
|
|
|
mov al, [esi+8]
|
|
mov bl, al
|
|
xor bl, [esi+9]
|
|
and bl, 0ccH
|
|
xor al, bl
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_31-nf26_11)], bl
|
|
mov [edx+(nf26_32-nf26_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf26_33-nf26_11)], bl
|
|
mov [edx+(nf26_34-nf26_11)], bh
|
|
|
|
mov al, [esi+10]
|
|
mov bl, al
|
|
xor bl, [esi+11]
|
|
and bl, 0ccH
|
|
xor al, bl
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_41-nf26_11)], bl
|
|
mov [edx+(nf26_42-nf26_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf26_43-nf26_11)], bl
|
|
mov [edx+(nf26_44-nf26_11)], bh
|
|
|
|
lea edx, [edx+(nf26_51-nf26_11)]
|
|
|
|
mov al, [esi+16]
|
|
mov bl, al
|
|
xor bl, [esi+17]
|
|
and bl, 0ccH
|
|
xor al, bl
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_51-nf26_51)], bl
|
|
mov [edx+(nf26_52-nf26_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf26_53-nf26_51)], bl
|
|
mov [edx+(nf26_54-nf26_51)], bh
|
|
|
|
mov al, [esi+18]
|
|
mov bl, al
|
|
xor bl, [esi+19]
|
|
and bl, 0ccH
|
|
xor al, bl
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_61-nf26_51)], bl
|
|
mov [edx+(nf26_62-nf26_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf26_63-nf26_51)], bl
|
|
mov [edx+(nf26_64-nf26_51)], bh
|
|
|
|
mov al, [esi+20]
|
|
mov bl, al
|
|
xor bl, [esi+21]
|
|
and bl, 0ccH
|
|
xor al, bl
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_71-nf26_51)], bl
|
|
mov [edx+(nf26_72-nf26_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf26_73-nf26_51)], bl
|
|
mov [edx+(nf26_74-nf26_51)], bh
|
|
|
|
mov al, [esi+22]
|
|
mov bl, al
|
|
xor bl, [esi+23]
|
|
and bl, 0ccH
|
|
xor al, bl
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_81-nf26_51)], bl
|
|
mov [edx+(nf26_82-nf26_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf26_83-nf26_51)], bl
|
|
mov [edx+(nf26_84-nf26_51)], bh
|
|
|
|
; Load bl,bh,cl,ch with four colors
|
|
mov bx, [esi]
|
|
mov cx, [esi+2]
|
|
|
|
mov edx, nf_width
|
|
jmp nf26_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf26_0:
|
|
nf26_11:mov al, bl
|
|
nf26_12:mov ah, bl
|
|
shl eax, 16
|
|
nf26_13:mov al, bl
|
|
nf26_14:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf26_21:mov al, bl
|
|
nf26_22:mov ah, bl
|
|
shl eax, 16
|
|
nf26_23:mov al, bl
|
|
nf26_24:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf26_31:mov al, bl
|
|
nf26_32:mov ah, bl
|
|
shl eax, 16
|
|
nf26_33:mov al, bl
|
|
nf26_34:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf26_41:mov al, bl
|
|
nf26_42:mov ah, bl
|
|
shl eax, 16
|
|
nf26_43:mov al, bl
|
|
nf26_44:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
lea eax, [edx*4-4]
|
|
sub edi, eax
|
|
|
|
; Load bl,bh,cl,ch with four colors
|
|
mov bx, [esi+12]
|
|
mov cx, [esi+14]
|
|
|
|
nf26_51:mov al, bl
|
|
nf26_52:mov ah, bl
|
|
shl eax, 16
|
|
nf26_53:mov al, bl
|
|
nf26_54:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf26_61:mov al, bl
|
|
nf26_62:mov ah, bl
|
|
shl eax, 16
|
|
nf26_63:mov al, bl
|
|
nf26_64:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf26_71:mov al, bl
|
|
nf26_72:mov ah, bl
|
|
shl eax, 16
|
|
nf26_73:mov al, bl
|
|
nf26_74:mov ah, bl
|
|
mov [edi], eax
|
|
add edi, edx
|
|
|
|
nf26_81:mov al, bl
|
|
nf26_82:mov ah, bl
|
|
shl eax, 16
|
|
nf26_83:mov al, bl
|
|
nf26_84:mov ah, bl
|
|
mov [edi], eax
|
|
|
|
add esi, 24
|
|
sub edi, 4
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
;nf10+32
|
|
nf42: ; 1x2 8x4x2 (24 bytes)
|
|
|
|
if 0 ;debug
|
|
mov eax, 0
|
|
mov ebx, 0
|
|
add esi, 24
|
|
jmp nf_solid
|
|
endif
|
|
xor eax, eax
|
|
lea ecx, nfpk_mov4
|
|
lea edx, byte ptr ds:nf42_11+1
|
|
|
|
mov al, [esi+4]
|
|
mov bl, al
|
|
xor bl, [esi+6]
|
|
and bl, 0ccH
|
|
xor al, bl
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_11-nf42_11)], bl
|
|
mov [edx+(nf42_12-nf42_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf42_13-nf42_11)], bl
|
|
mov [edx+(nf42_14-nf42_11)], bh
|
|
|
|
mov al, [esi+5]
|
|
mov bl, al
|
|
xor bl, [esi+7]
|
|
and bl, 0ccH
|
|
xor al, bl
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_15-nf42_11)], bl
|
|
mov [edx+(nf42_16-nf42_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf42_17-nf42_11)], bl
|
|
mov [edx+(nf42_18-nf42_11)], bh
|
|
|
|
|
|
mov al, [esi+8]
|
|
mov bl, al
|
|
xor bl, [esi+10]
|
|
and bl, 0ccH
|
|
xor al, bl
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_31-nf42_11)], bl
|
|
mov [edx+(nf42_32-nf42_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf42_33-nf42_11)], bl
|
|
mov [edx+(nf42_34-nf42_11)], bh
|
|
|
|
mov al, [esi+9]
|
|
mov bl, al
|
|
xor bl, [esi+11]
|
|
and bl, 0ccH
|
|
xor al, bl
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_35-nf42_11)], bl
|
|
mov [edx+(nf42_36-nf42_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf42_37-nf42_11)], bl
|
|
mov [edx+(nf42_38-nf42_11)], bh
|
|
|
|
|
|
lea edx, [edx+(nf42_51-nf42_11)]
|
|
|
|
mov al, [esi+16]
|
|
mov bl, al
|
|
xor bl, [esi+18]
|
|
and bl, 0ccH
|
|
xor al, bl
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_51-nf42_51)], bl
|
|
mov [edx+(nf42_52-nf42_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf42_53-nf42_51)], bl
|
|
mov [edx+(nf42_54-nf42_51)], bh
|
|
|
|
mov al, [esi+17]
|
|
mov bl, al
|
|
xor bl, [esi+19]
|
|
and bl, 0ccH
|
|
xor al, bl
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_55-nf42_51)], bl
|
|
mov [edx+(nf42_56-nf42_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf42_57-nf42_51)], bl
|
|
mov [edx+(nf42_58-nf42_51)], bh
|
|
|
|
|
|
mov al, [esi+20]
|
|
mov bl, al
|
|
xor bl, [esi+22]
|
|
and bl, 0ccH
|
|
xor al, bl
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_71-nf42_51)], bl
|
|
mov [edx+(nf42_72-nf42_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf42_73-nf42_51)], bl
|
|
mov [edx+(nf42_74-nf42_51)], bh
|
|
|
|
mov al, [esi+21]
|
|
mov bl, al
|
|
xor bl, [esi+23]
|
|
and bl, 0ccH
|
|
xor al, bl
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_75-nf42_51)], bl
|
|
mov [edx+(nf42_76-nf42_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf42_77-nf42_51)], bl
|
|
mov [edx+(nf42_78-nf42_51)], bh
|
|
|
|
|
|
; Load bl,bh,cl,ch with four colors
|
|
mov bx, [esi]
|
|
mov cx, [esi+2]
|
|
|
|
mov edx, nf_width
|
|
jmp nf42_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf42_0:
|
|
nf42_11:mov al, bl
|
|
nf42_12:mov ah, bl
|
|
shl eax, 16
|
|
nf42_13:mov al, bl
|
|
nf42_14:mov ah, bl
|
|
mov [edi], eax
|
|
|
|
nf42_15:mov al, bl
|
|
nf42_16:mov ah, bl
|
|
shl eax, 16
|
|
nf42_17:mov al, bl
|
|
nf42_18:mov ah, bl
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf42_31:mov al, bl
|
|
nf42_32:mov ah, bl
|
|
shl eax, 16
|
|
nf42_33:mov al, bl
|
|
nf42_34:mov ah, bl
|
|
mov [edi], eax
|
|
|
|
nf42_35:mov al, bl
|
|
nf42_36:mov ah, bl
|
|
shl eax, 16
|
|
nf42_37:mov al, bl
|
|
nf42_38:mov ah, bl
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
; Load bl,bh,cl,ch with four colors
|
|
mov bx, [esi+12]
|
|
mov cx, [esi+14]
|
|
|
|
nf42_51:mov al, bl
|
|
nf42_52:mov ah, bl
|
|
shl eax, 16
|
|
nf42_53:mov al, bl
|
|
nf42_54:mov ah, bl
|
|
mov [edi], eax
|
|
|
|
nf42_55:mov al, bl
|
|
nf42_56:mov ah, bl
|
|
shl eax, 16
|
|
nf42_57:mov al, bl
|
|
nf42_58:mov ah, bl
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
|
|
nf42_71:mov al, bl
|
|
nf42_72:mov ah, bl
|
|
shl eax, 16
|
|
nf42_73:mov al, bl
|
|
nf42_74:mov ah, bl
|
|
mov [edi], eax
|
|
|
|
nf42_75:mov al, bl
|
|
nf42_76:mov ah, bl
|
|
shl eax, 16
|
|
nf42_77:mov al, bl
|
|
nf42_78:mov ah, bl
|
|
mov [edi+4], eax
|
|
|
|
add esi, 24
|
|
sub edi, nfpk_back_right
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf11: ; 8x8x8 (64 bytes)
|
|
if 0 ;debug
|
|
add esi, 64
|
|
mov eax, 0fefefefeH
|
|
; mov ebx, eax
|
|
mov ebx, 0
|
|
jmp nf_solid
|
|
endif
|
|
mov edx, nf_width
|
|
mov ebx, 0ff00ff00H
|
|
|
|
mov eax, [esi] ;0
|
|
mov ecx, eax
|
|
xor ecx, [esi+8]
|
|
and ecx, ebx
|
|
xor eax, ecx
|
|
mov [edi], eax
|
|
mov eax, [esi+4]
|
|
mov ecx, eax
|
|
xor ecx, [esi+4+8]
|
|
and ecx, ebx
|
|
xor eax, ecx
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
mov eax, [esi+16] ;2
|
|
mov ecx, eax
|
|
xor ecx, [esi+16+8]
|
|
and ecx, ebx
|
|
xor eax, ecx
|
|
mov [edi], eax
|
|
mov eax, [esi+20]
|
|
mov ecx, eax
|
|
xor ecx, [esi+20+8]
|
|
and ecx, ebx
|
|
xor eax, ecx
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
mov eax, [esi+32] ;4
|
|
mov ecx, eax
|
|
xor ecx, [esi+32+8]
|
|
and ecx, ebx
|
|
xor eax, ecx
|
|
mov [edi], eax
|
|
mov eax, [esi+36]
|
|
mov ecx, eax
|
|
xor ecx, [esi+36+8]
|
|
and ecx, ebx
|
|
xor eax, ecx
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
mov eax, [esi+48] ;6
|
|
mov ecx, eax
|
|
xor ecx, [esi+48+8]
|
|
and ecx, ebx
|
|
xor eax, ecx
|
|
mov [edi], eax
|
|
mov eax, [esi+52]
|
|
mov ecx, eax
|
|
xor ecx, [esi+52+8]
|
|
and ecx, ebx
|
|
xor eax, ecx
|
|
mov [edi+4], eax
|
|
|
|
add esi, 64
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf12: ; low 4x4x8 (16 bytes)
|
|
mov edx, nf_width
|
|
|
|
mov eax, [esi]
|
|
mov bl, ah
|
|
mov bh, ah
|
|
shl ebx, 16
|
|
mov bl, al
|
|
mov bh, al
|
|
mov [edi], ebx
|
|
shr eax, 16
|
|
mov bl, ah
|
|
mov bh, ah
|
|
shl ebx, 16
|
|
mov bl, al
|
|
mov bh, al
|
|
mov [edi+4], ebx
|
|
add edi, edx
|
|
|
|
mov eax, [esi+4]
|
|
mov bl, ah
|
|
mov bh, ah
|
|
shl ebx, 16
|
|
mov bl, al
|
|
mov bh, al
|
|
mov [edi], ebx
|
|
shr eax, 16
|
|
mov bl, ah
|
|
mov bh, ah
|
|
shl ebx, 16
|
|
mov bl, al
|
|
mov bh, al
|
|
mov [edi+4], ebx
|
|
add edi, edx
|
|
|
|
mov eax, [esi+8]
|
|
mov bl, ah
|
|
mov bh, ah
|
|
shl ebx, 16
|
|
mov bl, al
|
|
mov bh, al
|
|
mov [edi], ebx
|
|
shr eax, 16
|
|
mov bl, ah
|
|
mov bh, ah
|
|
shl ebx, 16
|
|
mov bl, al
|
|
mov bh, al
|
|
mov [edi+4], ebx
|
|
add edi, edx
|
|
|
|
mov eax, [esi+12]
|
|
mov bl, ah
|
|
mov bh, ah
|
|
shl ebx, 16
|
|
mov bl, al
|
|
mov bh, al
|
|
mov [edi], ebx
|
|
shr eax, 16
|
|
mov bl, ah
|
|
mov bh, ah
|
|
shl ebx, 16
|
|
mov bl, al
|
|
mov bh, al
|
|
mov [edi+4], ebx
|
|
|
|
sub edi, nfpk_back_right
|
|
add esi, 16
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf13: ; 2x2 4x4x0 (4 bytes)
|
|
mov edx, nf_width
|
|
|
|
mov cl, [esi]
|
|
mov ch, cl
|
|
mov eax, ecx
|
|
shl eax, 16
|
|
mov ax, cx
|
|
|
|
mov cl, [esi+1]
|
|
mov ch, cl
|
|
mov ebx, ecx
|
|
shl ebx, 16
|
|
mov bx, cx
|
|
|
|
mov [edi], eax
|
|
mov [edi+4], ebx
|
|
mov [edi+edx], eax
|
|
mov [edi+edx+4], ebx
|
|
lea edi, [edi+edx*2]
|
|
|
|
mov cl, [esi+2]
|
|
mov ch, cl
|
|
mov eax, ecx
|
|
shl eax, 16
|
|
mov ax, cx
|
|
|
|
mov cl, [esi+3]
|
|
mov ch, cl
|
|
mov ebx, ecx
|
|
shl ebx, 16
|
|
mov bx, cx
|
|
|
|
mov [edi], eax
|
|
mov [edi+4], ebx
|
|
add edi, edx
|
|
mov [edi], eax
|
|
mov [edi+4], ebx
|
|
|
|
sub edi, nfpk_back_right
|
|
add esi, 4
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf14: ; 8x8x0 (1 byte)
|
|
mov bl, [esi] ; Copy color into 8 positions
|
|
inc esi
|
|
mov bh, bl
|
|
mov eax, ebx
|
|
shl eax, 16
|
|
mov ax, bx
|
|
mov ebx, eax
|
|
if 0 ;debug
|
|
mov eax, 080808080h
|
|
mov ebx, eax
|
|
endif
|
|
jmp nf_solid
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf15: ; mix 8x8x0 (2 bytes)
|
|
mov bx, [esi] ; Copy 2 colors into 8 positions
|
|
add esi, 2 ; in a checkerboard
|
|
mov ax, bx
|
|
shl eax, 16
|
|
mov ax, bx
|
|
mov ebx, eax
|
|
rol ebx, 8
|
|
if 0 ;debug
|
|
mov eax, 080808080h
|
|
mov ebx, eax
|
|
endif
|
|
nf_solid:
|
|
mov edx, nf_width
|
|
|
|
mov [edi], eax
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
mov [edi], ebx
|
|
mov [edi+4], ebx
|
|
add edi, edx
|
|
mov [edi], eax
|
|
mov [edi+4], eax
|
|
add edi, edx
|
|
mov [edi], ebx
|
|
mov [edi+4], ebx
|
|
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
nfPkDecompD ENDP
|
|
|
|
endif
|
|
|
|
;---
|
|
|
|
.data
|
|
|
|
; Constant tables
|
|
|
|
nfhpk_mov4l LABEL DWORD
|
|
; low 4x1 in 8x1 (patch +1)
|
|
; mov eax, ebx/ecx
|
|
MOVH4L_REGS TEXTEQU <!<0c0h+3,0c0h+1!>>
|
|
%FOR m4, MOVH4L_REGS
|
|
% FOR m3, MOVH4L_REGS
|
|
% FOR m2, MOVH4L_REGS
|
|
% FOR m1, MOVH4L_REGS
|
|
BYTE m1,m2,m3,m4
|
|
ENDM
|
|
ENDM
|
|
ENDM
|
|
ENDM
|
|
|
|
nfhpk_mov8 LABEL DWORD
|
|
; 8x1 (each two bits select a pair of colors in a reg)
|
|
; low 4x2 in 8x2 (each two bits select a duplicated color in reg)
|
|
; (patch +1)
|
|
; mov ds:[edi+0/4/8/12], ebx/edx/ecx/ebp
|
|
; Note: Patched code specifies mov [ebp+0]... instead
|
|
; of mov [edi+0]... to insure that 8-bit offsets are
|
|
; used by the assembler even for offset of zero.
|
|
;
|
|
MOVH8_REGS TEXTEQU <!<3*8,2*8,1*8,5*8!>>
|
|
%FOR m4, MOVH8_REGS
|
|
% FOR m3, MOVH8_REGS
|
|
% FOR m2, MOVH8_REGS
|
|
% FOR m1, MOVH8_REGS
|
|
BYTE m1+047h,m2+047h,m3+047h,m4+047h
|
|
ENDM
|
|
ENDM
|
|
ENDM
|
|
ENDM
|
|
|
|
nfhpk_mov4 LABEL DWORD
|
|
; 4x2 (patch +2)
|
|
; mov ax, bx/dx/cx/bp
|
|
; low 4x2 in 8x2 (patch +1)
|
|
; mov eax, ebx/edx/ecx/ebp
|
|
MOVH4_REGS TEXTEQU <!<0c0h+3,0c0h+2,0c0h+1,0c0h+5!>>
|
|
%FOR m4, MOVH4_REGS
|
|
% FOR m3, MOVH4_REGS
|
|
% FOR m2, MOVH4_REGS
|
|
% FOR m1, MOVH4_REGS
|
|
BYTE m1,m2,m3,m4
|
|
ENDM
|
|
ENDM
|
|
ENDM
|
|
ENDM
|
|
|
|
.code
|
|
|
|
; Normal version (HiColor)
|
|
;
|
|
|
|
if TRANS16
|
|
|
|
if 0
|
|
Trans16 MACRO dst:req, idx:req, mask
|
|
mov dst, [idx]
|
|
ifnb <mask>
|
|
and dst, 07FFFh
|
|
endif
|
|
ENDM
|
|
elseif 0
|
|
Trans16 MACRO dst:req, idx:req, mask
|
|
mov dst, [idx]
|
|
mov ax, dst
|
|
and ax, 0FFE0h
|
|
add dst, ax
|
|
ENDM
|
|
else
|
|
EXTERN nf_trans16_lo: WORD
|
|
EXTERN nf_trans16_hi: WORD
|
|
Trans16 MACRO dst:req, idx:req, mask
|
|
xor eax, eax
|
|
mov al, [idx]
|
|
mov dst, nf_trans16_lo[eax*2]
|
|
xor eax, eax
|
|
mov al, [idx+1]
|
|
or dst, nf_trans16_hi[eax*2]
|
|
ENDM
|
|
endif
|
|
|
|
else
|
|
|
|
Trans16 MACRO dst:req, idx:req, mask
|
|
mov dst, [idx]
|
|
ifnb <mask>
|
|
and dst, 07FFFh
|
|
endif
|
|
ENDM
|
|
|
|
endif
|
|
|
|
nfHPkDecomp PROC USES ESI EDI EBX, \
|
|
ops:PTRBYTE, comp:PTRBYTE, \
|
|
x:DWORD, y:DWORD, w:DWORD, h:DWORD
|
|
LOCAL tbuf: PTRBYTE
|
|
LOCAL new_row:DWORD
|
|
LOCAL DiffBufPtrs:DWORD
|
|
|
|
LOCAL nfpk_back_right: DWORD
|
|
LOCAL wcnt:DWORD
|
|
LOCAL bcomp:PTRBYTE
|
|
|
|
LOG_LABEL "StartPkDecomp"
|
|
|
|
.data
|
|
nfhpk_OpTbl label dword
|
|
dword offset nf0 ; Prev Same (0)
|
|
dword offset nf1 ; No change (and copied to screen) (0)
|
|
dword offset nf2 ; Near shift from older part of current buf (1)
|
|
dword offset nf3 ; Near shift from newer part of current buf (1)
|
|
dword offset nf4 ; Near shift from previous buffer (1)
|
|
dword offset nf5 ; Far shift from previous buffer (2)
|
|
dword offset nf6 ; Far shift from current buffer (2)
|
|
; [Or if COMPOPS, run of no changes (0)]
|
|
dword offset nf7 ; 8x8x1 (10 bytes) or low 4x4x1 (4 bytes)
|
|
dword offset nf8 ; 2x2 4x4x1 (16 bytes) or 2x1 4x8x1 (12 bytes) or 1x2 8x4x1 (12 bytes)
|
|
dword offset nf9 ; 8x8x2 (20 bytes) or low 4x4x2 (8 bytes) or
|
|
; low 4x8x2 (12 bytes) or low 8x4x2 (12 bytes)
|
|
dword offset nf10 ; 2x2 4x4x2 (32 bytes) or 2x1 4x8x2 (24 bytes) or 1x2 4x8x2 (24 bytes)
|
|
dword offset nf11 ; 8x8x8 (64 bytes)
|
|
dword offset nf12 ; low 4x4x8 (16 bytes)
|
|
dword offset nf13 ; 2x2 4x4x0 (ie 2x2x8) (4 bytes)
|
|
dword offset nf14 ; 8x8x0 (1 byte)
|
|
dword offset nf15 ; mix 8x8x0 (2 bytes)
|
|
.code
|
|
|
|
ifdef SYMANTEC
|
|
mov ebx, ds ; Allow DS to access code
|
|
mov ecx, 0
|
|
mov ax, 3505h
|
|
int 21h
|
|
endif
|
|
|
|
NF_DECOMP_INIT 1
|
|
|
|
mov eax, nf_back_right
|
|
sub eax, SWIDTH*2
|
|
mov nfpk_back_right, eax
|
|
|
|
mov esi, comp
|
|
mov edi, tbuf
|
|
|
|
xor eax, eax
|
|
mov ax, [esi]
|
|
add eax, esi
|
|
mov bcomp, eax
|
|
add esi, 2
|
|
|
|
nf_StartRow:
|
|
mov eax, w
|
|
shr eax, 1
|
|
mov wcnt,eax
|
|
ALIGN 4
|
|
nf_NextPair:
|
|
dec wcnt
|
|
js nf_NextRow
|
|
mov ebx, ops
|
|
mov al, [ebx]
|
|
inc ebx
|
|
mov ops, ebx
|
|
|
|
xor ebx, ebx
|
|
mov bl, al
|
|
shr bl, 4
|
|
and eax, 0Fh
|
|
push offset nf_NextPair
|
|
push nfhpk_OpTbl[ebx*4]
|
|
jmp nfhpk_OpTbl[eax*4]
|
|
|
|
nf_NextRow:
|
|
add edi, new_row
|
|
dec h
|
|
jnz nf_StartRow
|
|
LOG_LABEL "EndPkDecomp"
|
|
|
|
ifdef SYMANTEC
|
|
mov ebx, ds ; Disable DS from accessing code
|
|
mov ecx, offset DGROUP:_data_bottom[-1]
|
|
mov ax, 3505h
|
|
int 21h
|
|
endif
|
|
ret
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf0: ; No change from previous buffer
|
|
mov eax, DiffBufPtrs
|
|
jmp nf_shift
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf1: ; No change (and copied to screen)
|
|
if 0 ;debug
|
|
mov ebx, 0
|
|
jmp nf_solid
|
|
endif
|
|
add edi, SWIDTH*2
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf2: ; Near shift from older part of current buffer
|
|
xor eax, eax
|
|
mov ebx, bcomp
|
|
inc bcomp
|
|
mov al, [ebx]
|
|
mov ax, nfpk_ShiftP2[eax*2]
|
|
nf_xyc_shift:
|
|
xor ebx, ebx
|
|
mov bl, ah
|
|
shl eax, 24
|
|
sar eax, 24-1
|
|
add eax, nfpk_ShiftY[ebx*4]
|
|
jmp nf_shift
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf3: ; Near shift from newer part of current buffer
|
|
xor eax, eax
|
|
mov ebx, bcomp
|
|
inc bcomp
|
|
mov al, [ebx]
|
|
mov ax, nfpk_ShiftP2[eax*2]
|
|
neg al
|
|
neg ah
|
|
jmp nf_xyc_shift
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf4: ; Near shift from previous buffer
|
|
xor eax, eax
|
|
mov ebx, bcomp
|
|
inc bcomp
|
|
mov al, [ebx]
|
|
mov ax, nfpk_ShiftP1[eax*2]
|
|
jmp nf_xyp_shift
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf5: ; Far shift from previous buffer
|
|
mov ax, [esi]
|
|
add esi, 2
|
|
nf_xyp_shift:
|
|
xor ebx, ebx
|
|
mov bl, ah
|
|
shl eax, 24
|
|
sar eax, 24-1
|
|
add eax, nfpk_ShiftY[ebx*4]
|
|
add eax, DiffBufPtrs
|
|
jmp nf_shift
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
|
|
nf6: ; Far shift from current buffer
|
|
mov ax, [esi]
|
|
add esi, 2
|
|
jmp nf_xyc_shift
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf_shift:
|
|
if 0 ;debug
|
|
mov ebx, 0
|
|
jmp nf_solid
|
|
endif
|
|
mov ebx, esi ; save esi
|
|
lea esi, [edi+eax]
|
|
mov edx, nf_width
|
|
|
|
REPEAT 7
|
|
mov eax, [esi]
|
|
mov [edi], eax
|
|
mov eax, [esi+4]
|
|
mov [edi+4], eax
|
|
mov eax, [esi+8]
|
|
mov [edi+8], eax
|
|
mov eax, [esi+12]
|
|
mov [edi+12], eax
|
|
add esi, edx
|
|
add edi, edx
|
|
ENDM
|
|
mov eax, [esi]
|
|
mov [edi], eax
|
|
mov eax, [esi+4]
|
|
mov [edi+4], eax
|
|
mov eax, [esi+8]
|
|
mov [edi+8], eax
|
|
mov eax, [esi+12]
|
|
mov [edi+12], eax
|
|
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
mov esi, ebx ; restore esi
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf7: ; 8x8x1 (12 bytes)
|
|
|
|
test word ptr [esi], 08000h
|
|
jnz nf23
|
|
|
|
if 0 ;debug
|
|
add esi, 12
|
|
mov ebx, 0
|
|
jmp nf_solid
|
|
endif
|
|
xor eax, eax
|
|
lea ecx, nfhpk_mov8
|
|
lea edx, byte ptr ds:nf7_11+1
|
|
|
|
mov al, [esi+4]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf7_11-nf7_11)], bl
|
|
mov [edx+(nf7_12-nf7_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf7_13-nf7_11)], bl
|
|
mov [edx+(nf7_14-nf7_11)], bh
|
|
|
|
mov al, [esi+5]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf7_21-nf7_11)], bl
|
|
mov [edx+(nf7_22-nf7_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf7_23-nf7_11)], bl
|
|
mov [edx+(nf7_24-nf7_11)], bh
|
|
|
|
mov al, [esi+6]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf7_31-nf7_11)], bl
|
|
mov [edx+(nf7_32-nf7_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf7_33-nf7_11)], bl
|
|
mov [edx+(nf7_34-nf7_11)], bh
|
|
|
|
mov al, [esi+7]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf7_41-nf7_11)], bl
|
|
mov [edx+(nf7_42-nf7_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf7_43-nf7_11)], bl
|
|
mov [edx+(nf7_44-nf7_11)], bh
|
|
|
|
lea edx, [edx+(nf7_51-nf7_11)]
|
|
|
|
mov al, [esi+8]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf7_51-nf7_51)], bl
|
|
mov [edx+(nf7_52-nf7_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf7_53-nf7_51)], bl
|
|
mov [edx+(nf7_54-nf7_51)], bh
|
|
|
|
mov al, [esi+9]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf7_61-nf7_51)], bl
|
|
mov [edx+(nf7_62-nf7_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf7_63-nf7_51)], bl
|
|
mov [edx+(nf7_64-nf7_51)], bh
|
|
|
|
mov al, [esi+10]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf7_71-nf7_51)], bl
|
|
mov [edx+(nf7_72-nf7_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf7_73-nf7_51)], bl
|
|
mov [edx+(nf7_74-nf7_51)], bh
|
|
|
|
mov al, [esi+11]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf7_81-nf7_51)], bl
|
|
mov [edx+(nf7_82-nf7_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf7_83-nf7_51)], bl
|
|
mov [edx+(nf7_84-nf7_51)], bh
|
|
|
|
push ebp
|
|
push esi
|
|
; load ebx,edx,ecx,ebp with 00,01,10,11 color combinations
|
|
; (note that bits are read least significant first).
|
|
if TRANS16
|
|
Trans16 cx, esi+2
|
|
shl ecx, 16
|
|
Trans16 cx, esi
|
|
else
|
|
mov ecx, [esi]
|
|
endif
|
|
mov esi,nf_width
|
|
mov edx, ecx
|
|
|
|
ror edx, 16
|
|
mov ebx, edx
|
|
mov bx, cx
|
|
mov ebp, ecx
|
|
mov bp, dx
|
|
|
|
jmp nf7_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf7_0:
|
|
nf7_11: mov [ebp+0], ebx
|
|
nf7_12: mov [ebp+4], ebx
|
|
nf7_13: mov [ebp+8], ebx
|
|
nf7_14: mov [ebp+12], ebx
|
|
add edi, esi
|
|
|
|
nf7_21: mov [ebp+0], ebx
|
|
nf7_22: mov [ebp+4], ebx
|
|
nf7_23: mov [ebp+8], ebx
|
|
nf7_24: mov [ebp+12], ebx
|
|
add edi, esi
|
|
|
|
nf7_31: mov [ebp+0], ebx
|
|
nf7_32: mov [ebp+4], ebx
|
|
nf7_33: mov [ebp+8], ebx
|
|
nf7_34: mov [ebp+12], ebx
|
|
add edi, esi
|
|
|
|
nf7_41: mov [ebp+0], ebx
|
|
nf7_42: mov [ebp+4], ebx
|
|
nf7_43: mov [ebp+8], ebx
|
|
nf7_44: mov [ebp+12], ebx
|
|
add edi, esi
|
|
|
|
nf7_51: mov [ebp+0], ebx
|
|
nf7_52: mov [ebp+4], ebx
|
|
nf7_53: mov [ebp+8], ebx
|
|
nf7_54: mov [ebp+12], ebx
|
|
add edi, esi
|
|
|
|
nf7_61: mov [ebp+0], ebx
|
|
nf7_62: mov [ebp+4], ebx
|
|
nf7_63: mov [ebp+8], ebx
|
|
nf7_64: mov [ebp+12], ebx
|
|
add edi, esi
|
|
|
|
nf7_71: mov [ebp+0], ebx
|
|
nf7_72: mov [ebp+4], ebx
|
|
nf7_73: mov [ebp+8], ebx
|
|
nf7_74: mov [ebp+12], ebx
|
|
add edi, esi
|
|
|
|
nf7_81: mov [ebp+0], ebx
|
|
nf7_82: mov [ebp+4], ebx
|
|
nf7_83: mov [ebp+8], ebx
|
|
nf7_84: mov [ebp+12], ebx
|
|
|
|
pop esi
|
|
pop ebp
|
|
add esi, 12
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
;nf7+16
|
|
nf23: ; low 4x4x1 (6 bytes)
|
|
|
|
if 0 ;debug
|
|
add esi, 6
|
|
mov ebx, 0
|
|
jmp nf_solid
|
|
endif
|
|
xor eax, eax
|
|
lea ecx, nfhpk_mov4l
|
|
lea edx, byte ptr ds:nf23_11+1
|
|
|
|
mov al, [esi+4]
|
|
and al, 0fH
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf23_11-nf23_11)], bl
|
|
mov [edx+(nf23_12-nf23_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf23_13-nf23_11)], bl
|
|
mov [edx+(nf23_14-nf23_11)], bh
|
|
|
|
mov al, [esi+4]
|
|
shr al, 4
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf23_31-nf23_11)], bl
|
|
mov [edx+(nf23_32-nf23_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf23_33-nf23_11)], bl
|
|
mov [edx+(nf23_34-nf23_11)], bh
|
|
|
|
|
|
mov al, [esi+5]
|
|
and al, 0fH
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf23_51-nf23_11)], bl
|
|
mov [edx+(nf23_52-nf23_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf23_53-nf23_11)], bl
|
|
mov [edx+(nf23_54-nf23_11)], bh
|
|
|
|
mov al, [esi+5]
|
|
shr al, 4
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf23_71-nf23_11)], bl
|
|
mov [edx+(nf23_72-nf23_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf23_73-nf23_11)], bl
|
|
mov [edx+(nf23_74-nf23_11)], bh
|
|
|
|
mov edx, nf_width
|
|
|
|
; load ebx,ecx with 00,11 color combinations
|
|
if TRANS16
|
|
Trans16 cx, esi, 1
|
|
shrd ebx, ecx, 16
|
|
mov bx, cx
|
|
Trans16 cx, esi+2
|
|
shrd eax, ecx, 16
|
|
mov ax, cx
|
|
mov ecx, eax
|
|
else
|
|
mov ebx, [esi]
|
|
and ebx, 07FFF7FFFh
|
|
mov ecx, ebx
|
|
ror ebx, 16
|
|
xchg bx,cx
|
|
endif
|
|
|
|
jmp nf23_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf23_0:
|
|
|
|
nf23_11:mov eax, ebx
|
|
mov [edi], eax
|
|
mov [edi+edx], eax
|
|
nf23_12:mov eax, ebx
|
|
mov [edi+4], eax
|
|
mov [edi+edx+4], eax
|
|
nf23_13:mov eax, ebx
|
|
mov [edi+8], eax
|
|
mov [edi+edx+8], eax
|
|
nf23_14:mov eax, ebx
|
|
mov [edi+12], eax
|
|
mov [edi+edx+12], eax
|
|
lea edi, [edi+edx*2]
|
|
|
|
nf23_31:mov eax, ebx
|
|
mov [edi], eax
|
|
mov [edi+edx], eax
|
|
nf23_32:mov eax, ebx
|
|
mov [edi+4], eax
|
|
mov [edi+edx+4], eax
|
|
nf23_33:mov eax, ebx
|
|
mov [edi+8], eax
|
|
mov [edi+edx+8], eax
|
|
nf23_34:mov eax, ebx
|
|
mov [edi+12], eax
|
|
mov [edi+edx+12], eax
|
|
lea edi, [edi+edx*2]
|
|
|
|
nf23_51:mov eax, ebx
|
|
mov [edi], eax
|
|
mov [edi+edx], eax
|
|
nf23_52:mov eax, ebx
|
|
mov [edi+4], eax
|
|
mov [edi+edx+4], eax
|
|
nf23_53:mov eax, ebx
|
|
mov [edi+8], eax
|
|
mov [edi+edx+8], eax
|
|
nf23_54:mov eax, ebx
|
|
mov [edi+12], eax
|
|
mov [edi+edx+12], eax
|
|
lea edi, [edi+edx*2]
|
|
|
|
nf23_71:mov eax, ebx
|
|
mov [edi], eax
|
|
mov [edi+edx], eax
|
|
nf23_72:mov eax, ebx
|
|
mov [edi+4], eax
|
|
mov [edi+edx+4], eax
|
|
nf23_73:mov eax, ebx
|
|
mov [edi+8], eax
|
|
mov [edi+edx+8], eax
|
|
nf23_74:mov eax, ebx
|
|
mov [edi+12], eax
|
|
mov [edi+edx+12], eax
|
|
add edi, edx
|
|
|
|
sub edi, nfpk_back_right
|
|
add esi, 6
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf8: ; 2x2 4x4x1 (24 bytes)
|
|
|
|
test word ptr [esi], 08000h
|
|
jnz nf24
|
|
|
|
if 0 ;debug
|
|
add esi, 24
|
|
mov ebx, 0
|
|
jmp nf_solid
|
|
endif
|
|
xor eax, eax
|
|
lea ecx, nfhpk_mov8
|
|
lea edx, byte ptr ds:nf8_11+1
|
|
|
|
mov al, [esi+4]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf8_11-nf8_11)], bl
|
|
mov [edx+(nf8_12-nf8_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf8_13-nf8_11)], bl
|
|
mov [edx+(nf8_14-nf8_11)], bh
|
|
|
|
mov al, [esi+5]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf8_21-nf8_11)], bl
|
|
mov [edx+(nf8_22-nf8_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf8_23-nf8_11)], bl
|
|
mov [edx+(nf8_24-nf8_11)], bh
|
|
|
|
|
|
mov al, [esi+10]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf8_31-nf8_11)], bl
|
|
mov [edx+(nf8_32-nf8_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf8_33-nf8_11)], bl
|
|
mov [edx+(nf8_34-nf8_11)], bh
|
|
|
|
mov al, [esi+11]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf8_41-nf8_11)], bl
|
|
mov [edx+(nf8_42-nf8_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf8_43-nf8_11)], bl
|
|
mov [edx+(nf8_44-nf8_11)], bh
|
|
|
|
add edx, nf8_51-nf8_11
|
|
|
|
mov al, [esi+16]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf8_51-nf8_51)], bl
|
|
mov [edx+(nf8_52-nf8_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf8_53-nf8_51)], bl
|
|
mov [edx+(nf8_54-nf8_51)], bh
|
|
|
|
mov al, [esi+17]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf8_61-nf8_51)], bl
|
|
mov [edx+(nf8_62-nf8_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf8_63-nf8_51)], bl
|
|
mov [edx+(nf8_64-nf8_51)], bh
|
|
|
|
|
|
mov al, [esi+22]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf8_71-nf8_51)], bl
|
|
mov [edx+(nf8_72-nf8_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf8_73-nf8_51)], bl
|
|
mov [edx+(nf8_74-nf8_51)], bh
|
|
|
|
mov al, [esi+23]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf8_81-nf8_51)], bl
|
|
mov [edx+(nf8_82-nf8_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf8_83-nf8_51)], bl
|
|
mov [edx+(nf8_84-nf8_51)], bh
|
|
|
|
|
|
push ebp
|
|
push esi
|
|
; load ebx,edx,ecx,ebp with 00,01,10,11 color combinations
|
|
; (note that bits are read least significant first).
|
|
if TRANS16
|
|
Trans16 cx, esi+18+2
|
|
shl ecx, 16
|
|
Trans16 cx, esi+18
|
|
push ecx
|
|
|
|
Trans16 cx, esi+12+2
|
|
shl ecx, 16
|
|
Trans16 cx, esi+12
|
|
push ecx
|
|
|
|
Trans16 cx, esi+6+2
|
|
shl ecx, 16
|
|
Trans16 cx, esi+6
|
|
push ecx
|
|
|
|
Trans16 cx, esi+2
|
|
shl ecx, 16
|
|
Trans16 cx, esi
|
|
else
|
|
mov ecx, [esi]
|
|
endif
|
|
mov esi,nf_width
|
|
mov edx, ecx
|
|
ror edx, 16
|
|
mov ebx, edx
|
|
mov bx, cx
|
|
mov ebp, ecx
|
|
mov bp, dx
|
|
|
|
jmp nf8_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf8_0:
|
|
nf8_11: mov [ebp+0], ebx
|
|
nf8_12: mov [ebp+4], ebx
|
|
add edi, esi
|
|
nf8_13: mov [ebp+0], ebx
|
|
nf8_14: mov [ebp+4], ebx
|
|
add edi, esi
|
|
|
|
nf8_21: mov [ebp+0], ebx
|
|
nf8_22: mov [ebp+4], ebx
|
|
add edi, esi
|
|
nf8_23: mov [ebp+0], ebx
|
|
nf8_24: mov [ebp+4], ebx
|
|
add edi, esi
|
|
|
|
if TRANS16
|
|
pop ecx
|
|
else
|
|
mov eax, [esp]
|
|
mov ecx, [eax+6]
|
|
endif
|
|
mov edx, ecx
|
|
ror edx, 16
|
|
mov ebx, edx
|
|
mov bx, cx
|
|
mov ebp, ecx
|
|
mov bp, dx
|
|
|
|
|
|
nf8_31: mov [ebp+0], ebx
|
|
nf8_32: mov [ebp+4], ebx
|
|
add edi, esi
|
|
nf8_33: mov [ebp+0], ebx
|
|
nf8_34: mov [ebp+4], ebx
|
|
add edi, esi
|
|
|
|
nf8_41: mov [ebp+0], ebx
|
|
nf8_42: mov [ebp+4], ebx
|
|
add edi, esi
|
|
nf8_43: mov [ebp+0], ebx
|
|
nf8_44: mov [ebp+4], ebx
|
|
add edi, esi
|
|
|
|
lea eax, [esi*8-8]
|
|
sub edi, eax
|
|
|
|
if TRANS16
|
|
pop ecx
|
|
else
|
|
mov eax, [esp]
|
|
mov ecx, [eax+12]
|
|
endif
|
|
mov edx, ecx
|
|
ror edx, 16
|
|
mov ebx, edx
|
|
mov bx, cx
|
|
mov ebp, ecx
|
|
mov bp, dx
|
|
|
|
nf8_51: mov [ebp+0], ebx
|
|
nf8_52: mov [ebp+4], ebx
|
|
add edi, esi
|
|
nf8_53: mov [ebp+0], ebx
|
|
nf8_54: mov [ebp+4], ebx
|
|
add edi, esi
|
|
|
|
nf8_61: mov [ebp+0], ebx
|
|
nf8_62: mov [ebp+4], ebx
|
|
add edi, esi
|
|
nf8_63: mov [ebp+0], ebx
|
|
nf8_64: mov [ebp+4], ebx
|
|
add edi, esi
|
|
|
|
if TRANS16
|
|
pop ecx
|
|
else
|
|
mov eax, [esp]
|
|
mov ecx, [eax+18]
|
|
endif
|
|
mov edx, ecx
|
|
ror edx, 16
|
|
mov ebx, edx
|
|
mov bx, cx
|
|
mov ebp, ecx
|
|
mov bp, dx
|
|
|
|
nf8_71: mov [ebp+0], ebx
|
|
nf8_72: mov [ebp+4], ebx
|
|
add edi, esi
|
|
nf8_73: mov [ebp+0], ebx
|
|
nf8_74: mov [ebp+4], ebx
|
|
add edi, esi
|
|
|
|
nf8_81: mov [ebp+0], ebx
|
|
nf8_82: mov [ebp+4], ebx
|
|
add edi, esi
|
|
nf8_83: mov [ebp+0], ebx
|
|
nf8_84: mov [ebp+4], ebx
|
|
|
|
pop esi
|
|
pop ebp
|
|
add esi, 24
|
|
sub edi, 8
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
;nf8+16
|
|
nf24: ; 2x1 4x8x1 (16 bytes)
|
|
|
|
test word ptr [esi+8], 08000h
|
|
jnz nf40
|
|
|
|
if 0 ;debug
|
|
add esi, 16
|
|
mov ebx, 0
|
|
jmp nf_solid
|
|
endif
|
|
xor eax, eax
|
|
lea ecx, nfhpk_mov8
|
|
lea edx, byte ptr ds:nf24_11+1
|
|
|
|
mov al, [esi+4]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf24_11-nf24_11)], bl
|
|
mov [edx+(nf24_12-nf24_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf24_13-nf24_11)], bl
|
|
mov [edx+(nf24_14-nf24_11)], bh
|
|
|
|
mov al, [esi+5]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf24_21-nf24_11)], bl
|
|
mov [edx+(nf24_22-nf24_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf24_23-nf24_11)], bl
|
|
mov [edx+(nf24_24-nf24_11)], bh
|
|
|
|
|
|
mov al, [esi+6]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf24_31-nf24_11)], bl
|
|
mov [edx+(nf24_32-nf24_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf24_33-nf24_11)], bl
|
|
mov [edx+(nf24_34-nf24_11)], bh
|
|
|
|
mov al, [esi+7]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf24_41-nf24_11)], bl
|
|
mov [edx+(nf24_42-nf24_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf24_43-nf24_11)], bl
|
|
mov [edx+(nf24_44-nf24_11)], bh
|
|
|
|
add edx, nf24_51-nf24_11
|
|
|
|
mov al, [esi+12]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf24_51-nf24_51)], bl
|
|
mov [edx+(nf24_52-nf24_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf24_53-nf24_51)], bl
|
|
mov [edx+(nf24_54-nf24_51)], bh
|
|
|
|
mov al, [esi+13]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf24_61-nf24_51)], bl
|
|
mov [edx+(nf24_62-nf24_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf24_63-nf24_51)], bl
|
|
mov [edx+(nf24_64-nf24_51)], bh
|
|
|
|
|
|
mov al, [esi+14]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf24_71-nf24_51)], bl
|
|
mov [edx+(nf24_72-nf24_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf24_73-nf24_51)], bl
|
|
mov [edx+(nf24_74-nf24_51)], bh
|
|
|
|
mov al, [esi+15]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf24_81-nf24_51)], bl
|
|
mov [edx+(nf24_82-nf24_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf24_83-nf24_51)], bl
|
|
mov [edx+(nf24_84-nf24_51)], bh
|
|
|
|
|
|
push ebp
|
|
push esi
|
|
; load ebx,edx,ecx,ebp with 00,01,10,11 color combinations
|
|
; (note that bits are read least significant first).
|
|
if TRANS16
|
|
Trans16 cx, esi+8+2
|
|
shl ecx, 16
|
|
Trans16 cx, esi+8
|
|
push ecx
|
|
|
|
Trans16 cx, esi+2
|
|
shl ecx, 16
|
|
Trans16 cx, esi, 1
|
|
else
|
|
mov ecx, [esi]
|
|
and ecx, 07FFF7FFFh
|
|
endif
|
|
mov esi,nf_width
|
|
mov edx, ecx
|
|
ror edx, 16
|
|
mov ebx, edx
|
|
mov bx, cx
|
|
mov ebp, ecx
|
|
mov bp, dx
|
|
|
|
jmp nf24_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf24_0:
|
|
|
|
nf24_11:mov [ebp+0], ebx
|
|
nf24_12:mov [ebp+4], ebx
|
|
add edi, esi
|
|
nf24_13:mov [ebp+0], ebx
|
|
nf24_14:mov [ebp+4], ebx
|
|
add edi, esi
|
|
|
|
nf24_21:mov [ebp+0], ebx
|
|
nf24_22:mov [ebp+4], ebx
|
|
add edi, esi
|
|
nf24_23:mov [ebp+0], ebx
|
|
nf24_24:mov [ebp+4], ebx
|
|
add edi, esi
|
|
|
|
nf24_31:mov [ebp+0], ebx
|
|
nf24_32:mov [ebp+4], ebx
|
|
add edi, esi
|
|
nf24_33:mov [ebp+0], ebx
|
|
nf24_34:mov [ebp+4], ebx
|
|
add edi, esi
|
|
|
|
nf24_41:mov [ebp+0], ebx
|
|
nf24_42:mov [ebp+4], ebx
|
|
add edi, esi
|
|
nf24_43:mov [ebp+0], ebx
|
|
nf24_44:mov [ebp+4], ebx
|
|
add edi, esi
|
|
|
|
lea eax, [esi*8-8]
|
|
sub edi, eax
|
|
|
|
if TRANS16
|
|
pop ecx
|
|
else
|
|
mov eax, [esp]
|
|
mov ecx, [eax+8]
|
|
endif
|
|
mov edx, ecx
|
|
ror edx, 16
|
|
mov ebx, edx
|
|
mov bx, cx
|
|
mov ebp, ecx
|
|
mov bp, dx
|
|
|
|
nf24_51:mov [ebp+0], ebx
|
|
nf24_52:mov [ebp+4], ebx
|
|
add edi, esi
|
|
nf24_53:mov [ebp+0], ebx
|
|
nf24_54:mov [ebp+4], ebx
|
|
add edi, esi
|
|
|
|
nf24_61:mov [ebp+0], ebx
|
|
nf24_62:mov [ebp+4], ebx
|
|
add edi, esi
|
|
nf24_63:mov [ebp+0], ebx
|
|
nf24_64:mov [ebp+4], ebx
|
|
add edi, esi
|
|
|
|
nf24_71:mov [ebp+0], ebx
|
|
nf24_72:mov [ebp+4], ebx
|
|
add edi, esi
|
|
nf24_73:mov [ebp+0], ebx
|
|
nf24_74:mov [ebp+4], ebx
|
|
add edi, esi
|
|
|
|
nf24_81:mov [ebp+0], ebx
|
|
nf24_82:mov [ebp+4], ebx
|
|
add edi, esi
|
|
nf24_83:mov [ebp+0], ebx
|
|
nf24_84:mov [ebp+4], ebx
|
|
|
|
pop esi
|
|
pop ebp
|
|
add esi, 16
|
|
sub edi, 8
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
;nf8+32
|
|
nf40: ; 1x2 8x4x1 (16 bytes)
|
|
|
|
if 0 ;debug
|
|
add esi, 16
|
|
mov ebx, 0
|
|
jmp nf_solid
|
|
endif
|
|
xor eax, eax
|
|
lea ecx, nfhpk_mov8
|
|
lea edx, byte ptr ds:nf40_11+1
|
|
|
|
mov al, [esi+4]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf40_11-nf40_11)], bl
|
|
mov [edx+(nf40_12-nf40_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf40_13-nf40_11)], bl
|
|
mov [edx+(nf40_14-nf40_11)], bh
|
|
|
|
mov al, [esi+5]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf40_21-nf40_11)], bl
|
|
mov [edx+(nf40_22-nf40_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf40_23-nf40_11)], bl
|
|
mov [edx+(nf40_24-nf40_11)], bh
|
|
|
|
|
|
mov al, [esi+6]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf40_31-nf40_11)], bl
|
|
mov [edx+(nf40_32-nf40_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf40_33-nf40_11)], bl
|
|
mov [edx+(nf40_34-nf40_11)], bh
|
|
|
|
mov al, [esi+7]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf40_41-nf40_11)], bl
|
|
mov [edx+(nf40_42-nf40_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf40_43-nf40_11)], bl
|
|
mov [edx+(nf40_44-nf40_11)], bh
|
|
|
|
add edx, nf40_51-nf40_11
|
|
|
|
mov al, [esi+12]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf40_51-nf40_51)], bl
|
|
mov [edx+(nf40_52-nf40_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf40_53-nf40_51)], bl
|
|
mov [edx+(nf40_54-nf40_51)], bh
|
|
|
|
mov al, [esi+13]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf40_61-nf40_51)], bl
|
|
mov [edx+(nf40_62-nf40_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf40_63-nf40_51)], bl
|
|
mov [edx+(nf40_64-nf40_51)], bh
|
|
|
|
|
|
mov al, [esi+14]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf40_71-nf40_51)], bl
|
|
mov [edx+(nf40_72-nf40_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf40_73-nf40_51)], bl
|
|
mov [edx+(nf40_74-nf40_51)], bh
|
|
|
|
mov al, [esi+15]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf40_81-nf40_51)], bl
|
|
mov [edx+(nf40_82-nf40_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf40_83-nf40_51)], bl
|
|
mov [edx+(nf40_84-nf40_51)], bh
|
|
|
|
|
|
push ebp
|
|
push esi
|
|
; load ebx,edx,ecx,ebp with 00,01,10,11 color combinations
|
|
; (note that bits are read least significant first).
|
|
if TRANS16
|
|
Trans16 cx, esi+8+2
|
|
shl ecx, 16
|
|
Trans16 cx, esi+8, 1
|
|
push ecx
|
|
|
|
Trans16 cx, esi+2
|
|
shl ecx, 16
|
|
Trans16 cx, esi, 1
|
|
else
|
|
mov ecx, [esi]
|
|
and ecx, 07FFF7FFFh
|
|
endif
|
|
mov esi,nf_width
|
|
mov edx, ecx
|
|
ror edx, 16
|
|
mov ebx, edx
|
|
mov bx, cx
|
|
mov ebp, ecx
|
|
mov bp, dx
|
|
|
|
jmp nf40_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf40_0:
|
|
|
|
nf40_11:mov [ebp+0], ebx
|
|
nf40_12:mov [ebp+4], ebx
|
|
nf40_13:mov [ebp+8], ebx
|
|
nf40_14:mov [ebp+12], ebx
|
|
add edi, esi
|
|
|
|
nf40_21:mov [ebp+0], ebx
|
|
nf40_22:mov [ebp+4], ebx
|
|
nf40_23:mov [ebp+8], ebx
|
|
nf40_24:mov [ebp+12], ebx
|
|
add edi, esi
|
|
|
|
nf40_31:mov [ebp+0], ebx
|
|
nf40_32:mov [ebp+4], ebx
|
|
nf40_33:mov [ebp+8], ebx
|
|
nf40_34:mov [ebp+12], ebx
|
|
add edi, esi
|
|
|
|
nf40_41:mov [ebp+0], ebx
|
|
nf40_42:mov [ebp+4], ebx
|
|
nf40_43:mov [ebp+8], ebx
|
|
nf40_44:mov [ebp+12], ebx
|
|
add edi, esi
|
|
|
|
if TRANS16
|
|
pop ecx
|
|
else
|
|
mov eax, [esp]
|
|
mov ecx, [eax+8]
|
|
and ecx, 07FFF7FFFh
|
|
endif
|
|
mov edx, ecx
|
|
ror edx, 16
|
|
mov ebx, edx
|
|
mov bx, cx
|
|
mov ebp, ecx
|
|
mov bp, dx
|
|
|
|
nf40_51:mov [ebp+0], ebx
|
|
nf40_52:mov [ebp+4], ebx
|
|
nf40_53:mov [ebp+8], ebx
|
|
nf40_54:mov [ebp+12], ebx
|
|
add edi, esi
|
|
|
|
nf40_61:mov [ebp+0], ebx
|
|
nf40_62:mov [ebp+4], ebx
|
|
nf40_63:mov [ebp+8], ebx
|
|
nf40_64:mov [ebp+12], ebx
|
|
add edi, esi
|
|
|
|
nf40_71:mov [ebp+0], ebx
|
|
nf40_72:mov [ebp+4], ebx
|
|
nf40_73:mov [ebp+8], ebx
|
|
nf40_74:mov [ebp+12], ebx
|
|
add edi, esi
|
|
|
|
nf40_81:mov [ebp+0], ebx
|
|
nf40_82:mov [ebp+4], ebx
|
|
nf40_83:mov [ebp+8], ebx
|
|
nf40_84:mov [ebp+12], ebx
|
|
|
|
pop esi
|
|
pop ebp
|
|
add esi, 16
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf9: ; 8x8x2 (24 bytes)
|
|
|
|
test word ptr [esi], 08000h
|
|
jnz nf41
|
|
|
|
test word ptr [esi+4], 08000h
|
|
jnz nf25
|
|
|
|
if 0 ;debug
|
|
add esi, 24
|
|
mov ebx, 0
|
|
jmp nf_solid
|
|
endif
|
|
xor eax, eax
|
|
lea ecx, nfhpk_mov4
|
|
lea edx, byte ptr ds:nf9_11+2
|
|
|
|
mov al, [esi+8]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_11-nf9_11)], bh
|
|
mov [edx+(nf9_12-nf9_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf9_13-nf9_11)], bh
|
|
mov [edx+(nf9_14-nf9_11)], bl
|
|
|
|
mov al, [esi+9]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_15-nf9_11)], bh
|
|
mov [edx+(nf9_16-nf9_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf9_17-nf9_11)], bh
|
|
mov [edx+(nf9_18-nf9_11)], bl
|
|
|
|
|
|
mov al, [esi+10]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_21-nf9_11)], bh
|
|
mov [edx+(nf9_22-nf9_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf9_23-nf9_11)], bh
|
|
mov [edx+(nf9_24-nf9_11)], bl
|
|
|
|
mov al, [esi+11]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_25-nf9_11)], bh
|
|
mov [edx+(nf9_26-nf9_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf9_27-nf9_11)], bh
|
|
mov [edx+(nf9_28-nf9_11)], bl
|
|
|
|
|
|
mov al, [esi+12]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_31-nf9_11)], bh
|
|
mov [edx+(nf9_32-nf9_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf9_33-nf9_11)], bh
|
|
mov [edx+(nf9_34-nf9_11)], bl
|
|
|
|
mov al, [esi+13]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_35-nf9_11)], bh
|
|
mov [edx+(nf9_36-nf9_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf9_37-nf9_11)], bh
|
|
mov [edx+(nf9_38-nf9_11)], bl
|
|
|
|
|
|
mov al, [esi+14]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_41-nf9_11)], bh
|
|
mov [edx+(nf9_42-nf9_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf9_43-nf9_11)], bh
|
|
mov [edx+(nf9_44-nf9_11)], bl
|
|
|
|
mov al, [esi+15]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_45-nf9_11)], bh
|
|
mov [edx+(nf9_46-nf9_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf9_47-nf9_11)], bh
|
|
mov [edx+(nf9_48-nf9_11)], bl
|
|
|
|
|
|
lea edx, [edx+(nf9_51-nf9_11)]
|
|
|
|
mov al, [esi+16]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_51-nf9_51)], bh
|
|
mov [edx+(nf9_52-nf9_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf9_53-nf9_51)], bh
|
|
mov [edx+(nf9_54-nf9_51)], bl
|
|
|
|
mov al, [esi+17]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_55-nf9_51)], bh
|
|
mov [edx+(nf9_56-nf9_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf9_57-nf9_51)], bh
|
|
mov [edx+(nf9_58-nf9_51)], bl
|
|
|
|
|
|
mov al, [esi+18]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_61-nf9_51)], bh
|
|
mov [edx+(nf9_62-nf9_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf9_63-nf9_51)], bh
|
|
mov [edx+(nf9_64-nf9_51)], bl
|
|
|
|
mov al, [esi+19]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_65-nf9_51)], bh
|
|
mov [edx+(nf9_66-nf9_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf9_67-nf9_51)], bh
|
|
mov [edx+(nf9_68-nf9_51)], bl
|
|
|
|
|
|
mov al, [esi+20]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_71-nf9_51)], bh
|
|
mov [edx+(nf9_72-nf9_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf9_73-nf9_51)], bh
|
|
mov [edx+(nf9_74-nf9_51)], bl
|
|
|
|
mov al, [esi+21]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_75-nf9_51)], bh
|
|
mov [edx+(nf9_76-nf9_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf9_77-nf9_51)], bh
|
|
mov [edx+(nf9_78-nf9_51)], bl
|
|
|
|
|
|
mov al, [esi+22]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_81-nf9_51)], bh
|
|
mov [edx+(nf9_82-nf9_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf9_83-nf9_51)], bh
|
|
mov [edx+(nf9_84-nf9_51)], bl
|
|
|
|
mov al, [esi+23]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_85-nf9_51)], bh
|
|
mov [edx+(nf9_86-nf9_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf9_87-nf9_51)], bh
|
|
mov [edx+(nf9_88-nf9_51)], bl
|
|
|
|
push ebp
|
|
push esi
|
|
; Load bx,dx,cx,bp with four colors
|
|
if TRANS16
|
|
Trans16 bx, esi
|
|
Trans16 dx, esi+2
|
|
Trans16 cx, esi+4
|
|
Trans16 bp, esi+6
|
|
else
|
|
mov bx, [esi]
|
|
mov dx, [esi+2]
|
|
mov cx, [esi+4]
|
|
mov bp, [esi+6]
|
|
endif
|
|
mov esi, nf_width
|
|
|
|
jmp nf9_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf9_0:
|
|
|
|
nf9_11: mov ax, bx
|
|
shl eax, 16
|
|
nf9_12: mov ax, bx
|
|
mov [edi], eax
|
|
nf9_13: mov ax, bx
|
|
shl eax, 16
|
|
nf9_14: mov ax, bx
|
|
mov [edi+4], eax
|
|
nf9_15: mov ax, bx
|
|
shl eax, 16
|
|
nf9_16: mov ax, bx
|
|
mov [edi+8], eax
|
|
nf9_17: mov ax, bx
|
|
shl eax, 16
|
|
nf9_18: mov ax, bx
|
|
mov [edi+12], eax
|
|
add edi, esi
|
|
|
|
nf9_21: mov ax, bx
|
|
shl eax, 16
|
|
nf9_22: mov ax, bx
|
|
mov [edi], eax
|
|
nf9_23: mov ax, bx
|
|
shl eax, 16
|
|
nf9_24: mov ax, bx
|
|
mov [edi+4], eax
|
|
nf9_25: mov ax, bx
|
|
shl eax, 16
|
|
nf9_26: mov ax, bx
|
|
mov [edi+8], eax
|
|
nf9_27: mov ax, bx
|
|
shl eax, 16
|
|
nf9_28: mov ax, bx
|
|
mov [edi+12], eax
|
|
add edi, esi
|
|
|
|
nf9_31: mov ax, bx
|
|
shl eax, 16
|
|
nf9_32: mov ax, bx
|
|
mov [edi], eax
|
|
nf9_33: mov ax, bx
|
|
shl eax, 16
|
|
nf9_34: mov ax, bx
|
|
mov [edi+4], eax
|
|
nf9_35: mov ax, bx
|
|
shl eax, 16
|
|
nf9_36: mov ax, bx
|
|
mov [edi+8], eax
|
|
nf9_37: mov ax, bx
|
|
shl eax, 16
|
|
nf9_38: mov ax, bx
|
|
mov [edi+12], eax
|
|
add edi, esi
|
|
|
|
nf9_41: mov ax, bx
|
|
shl eax, 16
|
|
nf9_42: mov ax, bx
|
|
mov [edi], eax
|
|
nf9_43: mov ax, bx
|
|
shl eax, 16
|
|
nf9_44: mov ax, bx
|
|
mov [edi+4], eax
|
|
nf9_45: mov ax, bx
|
|
shl eax, 16
|
|
nf9_46: mov ax, bx
|
|
mov [edi+8], eax
|
|
nf9_47: mov ax, bx
|
|
shl eax, 16
|
|
nf9_48: mov ax, bx
|
|
mov [edi+12], eax
|
|
add edi, esi
|
|
|
|
nf9_51: mov ax, bx
|
|
shl eax, 16
|
|
nf9_52: mov ax, bx
|
|
mov [edi], eax
|
|
nf9_53: mov ax, bx
|
|
shl eax, 16
|
|
nf9_54: mov ax, bx
|
|
mov [edi+4], eax
|
|
nf9_55: mov ax, bx
|
|
shl eax, 16
|
|
nf9_56: mov ax, bx
|
|
mov [edi+8], eax
|
|
nf9_57: mov ax, bx
|
|
shl eax, 16
|
|
nf9_58: mov ax, bx
|
|
mov [edi+12], eax
|
|
add edi, esi
|
|
|
|
nf9_61: mov ax, bx
|
|
shl eax, 16
|
|
nf9_62: mov ax, bx
|
|
mov [edi], eax
|
|
nf9_63: mov ax, bx
|
|
shl eax, 16
|
|
nf9_64: mov ax, bx
|
|
mov [edi+4], eax
|
|
nf9_65: mov ax, bx
|
|
shl eax, 16
|
|
nf9_66: mov ax, bx
|
|
mov [edi+8], eax
|
|
nf9_67: mov ax, bx
|
|
shl eax, 16
|
|
nf9_68: mov ax, bx
|
|
mov [edi+12], eax
|
|
add edi, esi
|
|
|
|
nf9_71: mov ax, bx
|
|
shl eax, 16
|
|
nf9_72: mov ax, bx
|
|
mov [edi], eax
|
|
nf9_73: mov ax, bx
|
|
shl eax, 16
|
|
nf9_74: mov ax, bx
|
|
mov [edi+4], eax
|
|
nf9_75: mov ax, bx
|
|
shl eax, 16
|
|
nf9_76: mov ax, bx
|
|
mov [edi+8], eax
|
|
nf9_77: mov ax, bx
|
|
shl eax, 16
|
|
nf9_78: mov ax, bx
|
|
mov [edi+12], eax
|
|
add edi, esi
|
|
|
|
nf9_81: mov ax, bx
|
|
shl eax, 16
|
|
nf9_82: mov ax, bx
|
|
mov [edi], eax
|
|
nf9_83: mov ax, bx
|
|
shl eax, 16
|
|
nf9_84: mov ax, bx
|
|
mov [edi+4], eax
|
|
nf9_85: mov ax, bx
|
|
shl eax, 16
|
|
nf9_86: mov ax, bx
|
|
mov [edi+8], eax
|
|
nf9_87: mov ax, bx
|
|
shl eax, 16
|
|
nf9_88: mov ax, bx
|
|
mov [edi+12], eax
|
|
|
|
pop esi
|
|
pop ebp
|
|
add esi, 24
|
|
sub edi, nfpk_back_right
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
;nf9+16
|
|
nf25: ; low 4x4x2 (12 bytes)
|
|
|
|
if 0 ;debug
|
|
add esi, 12
|
|
mov ebx, 0
|
|
jmp nf_solid
|
|
endif
|
|
xor eax, eax
|
|
lea ecx, nfhpk_mov4
|
|
lea edx, byte ptr ds:nf25_11+1
|
|
|
|
mov al, [esi+8]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf25_11-nf25_11)], bl
|
|
mov [edx+(nf25_12-nf25_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf25_13-nf25_11)], bl
|
|
mov [edx+(nf25_14-nf25_11)], bh
|
|
|
|
mov al, [esi+9]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf25_21-nf25_11)], bl
|
|
mov [edx+(nf25_22-nf25_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf25_23-nf25_11)], bl
|
|
mov [edx+(nf25_24-nf25_11)], bh
|
|
|
|
|
|
mov al, [esi+10]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf25_31-nf25_11)], bl
|
|
mov [edx+(nf25_32-nf25_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf25_33-nf25_11)], bl
|
|
mov [edx+(nf25_34-nf25_11)], bh
|
|
|
|
mov al, [esi+11]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf25_41-nf25_11)], bl
|
|
mov [edx+(nf25_42-nf25_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf25_43-nf25_11)], bl
|
|
mov [edx+(nf25_44-nf25_11)], bh
|
|
|
|
push ebp
|
|
push esi
|
|
; Load ebx,edx,ecx,ebp with four colors, duplicated in high order.
|
|
if TRANS16
|
|
Trans16 cx, esi
|
|
shrd ebx, ecx, 16
|
|
mov bx, cx
|
|
Trans16 cx, esi+2
|
|
shrd edx, ecx, 16
|
|
mov dx, cx
|
|
Trans16 cx, esi+4, 1
|
|
shrd eax, ecx, 16
|
|
mov ax, cx
|
|
push eax
|
|
Trans16 cx, esi+6
|
|
shrd ebp, ecx, 16
|
|
mov bp, cx
|
|
pop ecx
|
|
else
|
|
mov ax, [esi]
|
|
shrd ebx, eax, 16
|
|
mov bx, ax
|
|
mov ax, [esi+2]
|
|
shrd edx, eax, 16
|
|
mov dx, ax
|
|
mov ax, [esi+4]
|
|
and eax, 07fffh
|
|
shrd ecx, eax, 16
|
|
mov cx, ax
|
|
mov ax, [esi+6]
|
|
shrd ebp, eax, 16
|
|
mov bp, ax
|
|
endif
|
|
mov esi, nf_width
|
|
|
|
jmp nf25_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf25_0:
|
|
|
|
nf25_11:mov eax, ebx
|
|
mov [edi], eax
|
|
mov [edi+esi], eax
|
|
nf25_12:mov eax, ebx
|
|
mov [edi+4], eax
|
|
mov [edi+esi+4], eax
|
|
nf25_13:mov eax, ebx
|
|
mov [edi+8], eax
|
|
mov [edi+esi+8], eax
|
|
nf25_14:mov eax, ebx
|
|
mov [edi+12], eax
|
|
mov [edi+esi+12], eax
|
|
lea edi, [edi+esi*2]
|
|
|
|
nf25_21:mov eax, ebx
|
|
mov [edi], eax
|
|
mov [edi+esi], eax
|
|
nf25_22:mov eax, ebx
|
|
mov [edi+4], eax
|
|
mov [edi+esi+4], eax
|
|
nf25_23:mov eax, ebx
|
|
mov [edi+8], eax
|
|
mov [edi+esi+8], eax
|
|
nf25_24:mov eax, ebx
|
|
mov [edi+12], eax
|
|
mov [edi+esi+12], eax
|
|
lea edi, [edi+esi*2]
|
|
|
|
nf25_31:mov eax, ebx
|
|
mov [edi], eax
|
|
mov [edi+esi], eax
|
|
nf25_32:mov eax, ebx
|
|
mov [edi+4], eax
|
|
mov [edi+esi+4], eax
|
|
nf25_33:mov eax, ebx
|
|
mov [edi+8], eax
|
|
mov [edi+esi+8], eax
|
|
nf25_34:mov eax, ebx
|
|
mov [edi+12], eax
|
|
mov [edi+esi+12], eax
|
|
lea edi, [edi+esi*2]
|
|
|
|
nf25_41:mov eax, ebx
|
|
mov [edi], eax
|
|
mov [edi+esi], eax
|
|
nf25_42:mov eax, ebx
|
|
mov [edi+4], eax
|
|
mov [edi+esi+4], eax
|
|
nf25_43:mov eax, ebx
|
|
mov [edi+8], eax
|
|
mov [edi+esi+8], eax
|
|
nf25_44:mov eax, ebx
|
|
mov [edi+12], eax
|
|
mov [edi+esi+12], eax
|
|
|
|
add edi, esi
|
|
|
|
pop esi
|
|
pop ebp
|
|
add esi, 12
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
;nf9+32
|
|
nf41: ; low 4x8x2 (16 bytes)
|
|
test word ptr [esi+4], 08000h
|
|
jnz nf57
|
|
|
|
if 0 ;debug
|
|
add esi, 16
|
|
mov ebx, 0
|
|
jmp nf_solid
|
|
endif
|
|
xor eax, eax
|
|
lea ecx, nfhpk_mov8
|
|
lea edx, byte ptr ds:nf41_11+1
|
|
|
|
mov al, [esi+8]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf41_11-nf41_11)], bl
|
|
mov [edx+(nf41_12-nf41_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf41_13-nf41_11)], bl
|
|
mov [edx+(nf41_14-nf41_11)], bh
|
|
|
|
mov al, [esi+9]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf41_21-nf41_11)], bl
|
|
mov [edx+(nf41_22-nf41_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf41_23-nf41_11)], bl
|
|
mov [edx+(nf41_24-nf41_11)], bh
|
|
|
|
|
|
mov al, [esi+10]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf41_31-nf41_11)], bl
|
|
mov [edx+(nf41_32-nf41_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf41_33-nf41_11)], bl
|
|
mov [edx+(nf41_34-nf41_11)], bh
|
|
|
|
mov al, [esi+11]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf41_41-nf41_11)], bl
|
|
mov [edx+(nf41_42-nf41_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf41_43-nf41_11)], bl
|
|
mov [edx+(nf41_44-nf41_11)], bh
|
|
|
|
lea edx, [edx+(nf41_51-nf41_11)]
|
|
|
|
mov al, [esi+12]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf41_51-nf41_51)], bl
|
|
mov [edx+(nf41_52-nf41_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf41_53-nf41_51)], bl
|
|
mov [edx+(nf41_54-nf41_51)], bh
|
|
|
|
mov al, [esi+13]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf41_61-nf41_51)], bl
|
|
mov [edx+(nf41_62-nf41_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf41_63-nf41_51)], bl
|
|
mov [edx+(nf41_64-nf41_51)], bh
|
|
|
|
|
|
mov al, [esi+14]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf41_71-nf41_51)], bl
|
|
mov [edx+(nf41_72-nf41_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf41_73-nf41_51)], bl
|
|
mov [edx+(nf41_74-nf41_51)], bh
|
|
|
|
mov al, [esi+15]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf41_81-nf41_51)], bl
|
|
mov [edx+(nf41_82-nf41_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf41_83-nf41_51)], bl
|
|
mov [edx+(nf41_84-nf41_51)], bh
|
|
|
|
push ebp
|
|
push esi
|
|
; Load ebx,edx,ecx,ebp with four colors, duplicated in high order.
|
|
if TRANS16
|
|
Trans16 cx, esi, 1
|
|
shrd ebx, ecx, 16
|
|
mov bx, cx
|
|
Trans16 cx, esi+2
|
|
shrd edx, ecx, 16
|
|
mov dx, cx
|
|
Trans16 cx, esi+4
|
|
shrd eax, ecx, 16
|
|
mov ax, cx
|
|
push eax
|
|
Trans16 cx, esi+6
|
|
shrd ebp, ecx, 16
|
|
mov bp, cx
|
|
pop ecx
|
|
else
|
|
mov ax, [esi]
|
|
and eax, 07fffh
|
|
shrd ebx, eax, 16
|
|
mov bx, ax
|
|
mov ax, [esi+2]
|
|
shrd edx, eax, 16
|
|
mov dx, ax
|
|
mov ax, [esi+4]
|
|
shrd ecx, eax, 16
|
|
mov cx, ax
|
|
mov ax, [esi+6]
|
|
shrd ebp, eax, 16
|
|
mov bp, ax
|
|
endif
|
|
mov esi, nf_width
|
|
|
|
jmp nf41_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf41_0:
|
|
|
|
nf41_11:mov [ebp+0], ebx
|
|
nf41_12:mov [ebp+4], ebx
|
|
nf41_13:mov [ebp+8], ebx
|
|
nf41_14:mov [ebp+12], ebx
|
|
add edi, esi
|
|
|
|
nf41_21:mov [ebp+0], ebx
|
|
nf41_22:mov [ebp+4], ebx
|
|
nf41_23:mov [ebp+8], ebx
|
|
nf41_24:mov [ebp+12], ebx
|
|
add edi, esi
|
|
|
|
nf41_31:mov [ebp+0], ebx
|
|
nf41_32:mov [ebp+4], ebx
|
|
nf41_33:mov [ebp+8], ebx
|
|
nf41_34:mov [ebp+12], ebx
|
|
add edi, esi
|
|
|
|
nf41_41:mov [ebp+0], ebx
|
|
nf41_42:mov [ebp+4], ebx
|
|
nf41_43:mov [ebp+8], ebx
|
|
nf41_44:mov [ebp+12], ebx
|
|
add edi, esi
|
|
|
|
nf41_51:mov [ebp+0], ebx
|
|
nf41_52:mov [ebp+4], ebx
|
|
nf41_53:mov [ebp+8], ebx
|
|
nf41_54:mov [ebp+12], ebx
|
|
add edi, esi
|
|
|
|
nf41_61:mov [ebp+0], ebx
|
|
nf41_62:mov [ebp+4], ebx
|
|
nf41_63:mov [ebp+8], ebx
|
|
nf41_64:mov [ebp+12], ebx
|
|
add edi, esi
|
|
|
|
nf41_71:mov [ebp+0], ebx
|
|
nf41_72:mov [ebp+4], ebx
|
|
nf41_73:mov [ebp+8], ebx
|
|
nf41_74:mov [ebp+12], ebx
|
|
add edi, esi
|
|
|
|
nf41_81:mov [ebp+0], ebx
|
|
nf41_82:mov [ebp+4], ebx
|
|
nf41_83:mov [ebp+8], ebx
|
|
nf41_84:mov [ebp+12], ebx
|
|
|
|
pop esi
|
|
pop ebp
|
|
add esi, 16
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
;nf9+48
|
|
nf57: ; low 8x4x2 (16 bytes)
|
|
if 0 ;debug
|
|
add esi, 16
|
|
mov ebx, 0
|
|
jmp nf_solid
|
|
endif
|
|
xor eax, eax
|
|
lea ecx, nfhpk_mov4
|
|
lea edx, byte ptr ds:nf57_11+2
|
|
|
|
mov al, [esi+8]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf57_11-nf57_11)], bh
|
|
mov [edx+(nf57_12-nf57_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf57_13-nf57_11)], bh
|
|
mov [edx+(nf57_14-nf57_11)], bl
|
|
|
|
mov al, [esi+9]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf57_15-nf57_11)], bh
|
|
mov [edx+(nf57_16-nf57_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf57_17-nf57_11)], bh
|
|
mov [edx+(nf57_18-nf57_11)], bl
|
|
|
|
|
|
mov al, [esi+10]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf57_21-nf57_11)], bh
|
|
mov [edx+(nf57_22-nf57_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf57_23-nf57_11)], bh
|
|
mov [edx+(nf57_24-nf57_11)], bl
|
|
|
|
mov al, [esi+11]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf57_25-nf57_11)], bh
|
|
mov [edx+(nf57_26-nf57_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf57_27-nf57_11)], bh
|
|
mov [edx+(nf57_28-nf57_11)], bl
|
|
|
|
|
|
mov al, [esi+12]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf57_31-nf57_11)], bh
|
|
mov [edx+(nf57_32-nf57_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf57_33-nf57_11)], bh
|
|
mov [edx+(nf57_34-nf57_11)], bl
|
|
|
|
mov al, [esi+13]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf57_35-nf57_11)], bh
|
|
mov [edx+(nf57_36-nf57_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf57_37-nf57_11)], bh
|
|
mov [edx+(nf57_38-nf57_11)], bl
|
|
|
|
|
|
mov al, [esi+14]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf57_41-nf57_11)], bh
|
|
mov [edx+(nf57_42-nf57_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf57_43-nf57_11)], bh
|
|
mov [edx+(nf57_44-nf57_11)], bl
|
|
|
|
mov al, [esi+15]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf57_45-nf57_11)], bh
|
|
mov [edx+(nf57_46-nf57_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf57_47-nf57_11)], bh
|
|
mov [edx+(nf57_48-nf57_11)], bl
|
|
|
|
push ebp
|
|
push esi
|
|
; Load bx,dx,cx,bp with four colors
|
|
if TRANS16
|
|
Trans16 bx, esi, 1
|
|
Trans16 dx, esi+2
|
|
Trans16 cx, esi+4, 1
|
|
Trans16 bp, esi+6
|
|
else
|
|
mov bx, [esi]
|
|
and ebx, 07fffh
|
|
mov dx, [esi+2]
|
|
mov cx, [esi+4]
|
|
and ecx, 07fffh
|
|
mov bp, [esi+6]
|
|
endif
|
|
mov esi, nf_width
|
|
|
|
jmp nf57_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf57_0:
|
|
|
|
nf57_11:mov ax, bx
|
|
shl eax, 16
|
|
nf57_12:mov ax, bx
|
|
mov [edi], eax
|
|
mov [edi+esi], eax
|
|
nf57_13:mov ax, bx
|
|
shl eax, 16
|
|
nf57_14:mov ax, bx
|
|
mov [edi+4], eax
|
|
mov [edi+esi+4], eax
|
|
nf57_15:mov ax, bx
|
|
shl eax, 16
|
|
nf57_16:mov ax, bx
|
|
mov [edi+8], eax
|
|
mov [edi+esi+8], eax
|
|
nf57_17:mov ax, bx
|
|
shl eax, 16
|
|
nf57_18:mov ax, bx
|
|
mov [edi+12], eax
|
|
mov [edi+esi+12], eax
|
|
lea edi, [edi+esi*2]
|
|
|
|
nf57_21:mov ax, bx
|
|
shl eax, 16
|
|
nf57_22:mov ax, bx
|
|
mov [edi], eax
|
|
mov [edi+esi], eax
|
|
nf57_23:mov ax, bx
|
|
shl eax, 16
|
|
nf57_24:mov ax, bx
|
|
mov [edi+4], eax
|
|
mov [edi+esi+4], eax
|
|
nf57_25:mov ax, bx
|
|
shl eax, 16
|
|
nf57_26:mov ax, bx
|
|
mov [edi+8], eax
|
|
mov [edi+esi+8], eax
|
|
nf57_27:mov ax, bx
|
|
shl eax, 16
|
|
nf57_28:mov ax, bx
|
|
mov [edi+12], eax
|
|
mov [edi+esi+12], eax
|
|
lea edi, [edi+esi*2]
|
|
|
|
nf57_31:mov ax, bx
|
|
shl eax, 16
|
|
nf57_32:mov ax, bx
|
|
mov [edi], eax
|
|
mov [edi+esi], eax
|
|
nf57_33:mov ax, bx
|
|
shl eax, 16
|
|
nf57_34:mov ax, bx
|
|
mov [edi+4], eax
|
|
mov [edi+esi+4], eax
|
|
nf57_35:mov ax, bx
|
|
shl eax, 16
|
|
nf57_36:mov ax, bx
|
|
mov [edi+8], eax
|
|
mov [edi+esi+8], eax
|
|
nf57_37:mov ax, bx
|
|
shl eax, 16
|
|
nf57_38:mov ax, bx
|
|
mov [edi+12], eax
|
|
mov [edi+esi+12], eax
|
|
lea edi, [edi+esi*2]
|
|
|
|
nf57_41:mov ax, bx
|
|
shl eax, 16
|
|
nf57_42:mov ax, bx
|
|
mov [edi], eax
|
|
mov [edi+esi], eax
|
|
nf57_43:mov ax, bx
|
|
shl eax, 16
|
|
nf57_44:mov ax, bx
|
|
mov [edi+4], eax
|
|
mov [edi+esi+4], eax
|
|
nf57_45:mov ax, bx
|
|
shl eax, 16
|
|
nf57_46:mov ax, bx
|
|
mov [edi+8], eax
|
|
mov [edi+esi+8], eax
|
|
nf57_47:mov ax, bx
|
|
shl eax, 16
|
|
nf57_48:mov ax, bx
|
|
mov [edi+12], eax
|
|
mov [edi+esi+12], eax
|
|
add edi, esi
|
|
|
|
pop esi
|
|
pop ebp
|
|
add esi, 16
|
|
sub edi, nfpk_back_right
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf10: ; 2x2 4x4x2 (48 bytes)
|
|
|
|
test word ptr [esi], 08000h
|
|
jnz nf26
|
|
|
|
if 0 ;debug
|
|
add esi, 48
|
|
mov ebx, 0
|
|
jmp nf_solid
|
|
endif
|
|
xor eax, eax
|
|
lea ecx, nfhpk_mov4
|
|
lea edx, byte ptr ds:nf10_11+2
|
|
|
|
mov al, [esi+8]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_11-nf10_11)], bh
|
|
mov [edx+(nf10_12-nf10_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf10_13-nf10_11)], bh
|
|
mov [edx+(nf10_14-nf10_11)], bl
|
|
|
|
mov al, [esi+9]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_15-nf10_11)], bh
|
|
mov [edx+(nf10_16-nf10_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf10_17-nf10_11)], bh
|
|
mov [edx+(nf10_18-nf10_11)], bl
|
|
|
|
|
|
mov al, [esi+10]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_21-nf10_11)], bh
|
|
mov [edx+(nf10_22-nf10_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf10_23-nf10_11)], bh
|
|
mov [edx+(nf10_24-nf10_11)], bl
|
|
|
|
mov al, [esi+11]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_25-nf10_11)], bh
|
|
mov [edx+(nf10_26-nf10_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf10_27-nf10_11)], bh
|
|
mov [edx+(nf10_28-nf10_11)], bl
|
|
|
|
|
|
mov al, [esi+20]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_31-nf10_11)], bh
|
|
mov [edx+(nf10_32-nf10_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf10_33-nf10_11)], bh
|
|
mov [edx+(nf10_34-nf10_11)], bl
|
|
|
|
mov al, [esi+21]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_35-nf10_11)], bh
|
|
mov [edx+(nf10_36-nf10_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf10_37-nf10_11)], bh
|
|
mov [edx+(nf10_38-nf10_11)], bl
|
|
|
|
|
|
mov al, [esi+22]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_41-nf10_11)], bh
|
|
mov [edx+(nf10_42-nf10_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf10_43-nf10_11)], bh
|
|
mov [edx+(nf10_44-nf10_11)], bl
|
|
|
|
mov al, [esi+23]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_45-nf10_11)], bh
|
|
mov [edx+(nf10_46-nf10_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf10_47-nf10_11)], bh
|
|
mov [edx+(nf10_48-nf10_11)], bl
|
|
|
|
|
|
lea edx, [edx+(nf10_51-nf10_11)]
|
|
|
|
mov al, [esi+32]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_51-nf10_51)], bh
|
|
mov [edx+(nf10_52-nf10_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf10_53-nf10_51)], bh
|
|
mov [edx+(nf10_54-nf10_51)], bl
|
|
|
|
mov al, [esi+33]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_55-nf10_51)], bh
|
|
mov [edx+(nf10_56-nf10_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf10_57-nf10_51)], bh
|
|
mov [edx+(nf10_58-nf10_51)], bl
|
|
|
|
|
|
mov al, [esi+34]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_61-nf10_51)], bh
|
|
mov [edx+(nf10_62-nf10_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf10_63-nf10_51)], bh
|
|
mov [edx+(nf10_64-nf10_51)], bl
|
|
|
|
mov al, [esi+35]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_65-nf10_51)], bh
|
|
mov [edx+(nf10_66-nf10_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf10_67-nf10_51)], bh
|
|
mov [edx+(nf10_68-nf10_51)], bl
|
|
|
|
|
|
mov al, [esi+44]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_71-nf10_51)], bh
|
|
mov [edx+(nf10_72-nf10_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf10_73-nf10_51)], bh
|
|
mov [edx+(nf10_74-nf10_51)], bl
|
|
|
|
mov al, [esi+45]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_75-nf10_51)], bh
|
|
mov [edx+(nf10_76-nf10_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf10_77-nf10_51)], bh
|
|
mov [edx+(nf10_78-nf10_51)], bl
|
|
|
|
|
|
mov al, [esi+46]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_81-nf10_51)], bh
|
|
mov [edx+(nf10_82-nf10_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf10_83-nf10_51)], bh
|
|
mov [edx+(nf10_84-nf10_51)], bl
|
|
|
|
mov al, [esi+47]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_85-nf10_51)], bh
|
|
mov [edx+(nf10_86-nf10_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf10_87-nf10_51)], bh
|
|
mov [edx+(nf10_88-nf10_51)], bl
|
|
|
|
push ebp
|
|
push esi
|
|
; Load bx,dx,cx,bp with four colors
|
|
if TRANS16
|
|
Trans16 bx, esi
|
|
Trans16 dx, esi+2
|
|
Trans16 cx, esi+4
|
|
Trans16 bp, esi+6
|
|
else
|
|
mov bx, [esi]
|
|
mov dx, [esi+2]
|
|
mov cx, [esi+4]
|
|
mov bp, [esi+6]
|
|
endif
|
|
mov esi, nf_width
|
|
|
|
jmp nf10_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf10_0:
|
|
|
|
nf10_11:mov ax, bx
|
|
shl eax, 16
|
|
nf10_12:mov ax, bx
|
|
mov [edi], eax
|
|
nf10_13:mov ax, bx
|
|
shl eax, 16
|
|
nf10_14:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf10_15:mov ax, bx
|
|
shl eax, 16
|
|
nf10_16:mov ax, bx
|
|
mov [edi], eax
|
|
nf10_17:mov ax, bx
|
|
shl eax, 16
|
|
nf10_18:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf10_21:mov ax, bx
|
|
shl eax, 16
|
|
nf10_22:mov ax, bx
|
|
mov [edi], eax
|
|
nf10_23:mov ax, bx
|
|
shl eax, 16
|
|
nf10_24:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf10_25:mov ax, bx
|
|
shl eax, 16
|
|
nf10_26:mov ax, bx
|
|
mov [edi], eax
|
|
nf10_27:mov ax, bx
|
|
shl eax, 16
|
|
nf10_28:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
; Load bx,dx,cx,bp with four colors
|
|
if TRANS16
|
|
mov esi, [esp]
|
|
Trans16 bx, esi+12
|
|
Trans16 dx, esi+14
|
|
Trans16 cx, esi+16
|
|
Trans16 bp, esi+18
|
|
mov esi, nf_width
|
|
else
|
|
mov eax, [esp]
|
|
mov bx, [eax+12]
|
|
mov dx, [eax+14]
|
|
mov cx, [eax+16]
|
|
mov bp, [eax+18]
|
|
endif
|
|
|
|
nf10_31:mov ax, bx
|
|
shl eax, 16
|
|
nf10_32:mov ax, bx
|
|
mov [edi], eax
|
|
nf10_33:mov ax, bx
|
|
shl eax, 16
|
|
nf10_34:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf10_35:mov ax, bx
|
|
shl eax, 16
|
|
nf10_36:mov ax, bx
|
|
mov [edi], eax
|
|
nf10_37:mov ax, bx
|
|
shl eax, 16
|
|
nf10_38:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf10_41:mov ax, bx
|
|
shl eax, 16
|
|
nf10_42:mov ax, bx
|
|
mov [edi], eax
|
|
nf10_43:mov ax, bx
|
|
shl eax, 16
|
|
nf10_44:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf10_45:mov ax, bx
|
|
shl eax, 16
|
|
nf10_46:mov ax, bx
|
|
mov [edi], eax
|
|
nf10_47:mov ax, bx
|
|
shl eax, 16
|
|
nf10_48:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
lea eax, [esi*8-8]
|
|
sub edi, eax
|
|
|
|
; Load bx,dx,cx,bp with four colors
|
|
if TRANS16
|
|
mov esi, [esp]
|
|
Trans16 bx, esi+24
|
|
Trans16 dx, esi+26
|
|
Trans16 cx, esi+28
|
|
Trans16 bp, esi+30
|
|
mov esi, nf_width
|
|
else
|
|
mov eax, [esp]
|
|
mov bx, [eax+24]
|
|
mov dx, [eax+26]
|
|
mov cx, [eax+28]
|
|
mov bp, [eax+30]
|
|
endif
|
|
|
|
nf10_51:mov ax, bx
|
|
shl eax, 16
|
|
nf10_52:mov ax, bx
|
|
mov [edi], eax
|
|
nf10_53:mov ax, bx
|
|
shl eax, 16
|
|
nf10_54:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf10_55:mov ax, bx
|
|
shl eax, 16
|
|
nf10_56:mov ax, bx
|
|
mov [edi], eax
|
|
nf10_57:mov ax, bx
|
|
shl eax, 16
|
|
nf10_58:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf10_61:mov ax, bx
|
|
shl eax, 16
|
|
nf10_62:mov ax, bx
|
|
mov [edi], eax
|
|
nf10_63:mov ax, bx
|
|
shl eax, 16
|
|
nf10_64:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf10_65:mov ax, bx
|
|
shl eax, 16
|
|
nf10_66:mov ax, bx
|
|
mov [edi], eax
|
|
nf10_67:mov ax, bx
|
|
shl eax, 16
|
|
nf10_68:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
; Load bx,dx,cx,bp with four colors
|
|
if TRANS16
|
|
mov esi, [esp]
|
|
Trans16 bx, esi+36
|
|
Trans16 dx, esi+38
|
|
Trans16 cx, esi+40
|
|
Trans16 bp, esi+42
|
|
mov esi, nf_width
|
|
else
|
|
mov eax, [esp]
|
|
mov bx, [eax+36]
|
|
mov dx, [eax+38]
|
|
mov cx, [eax+40]
|
|
mov bp, [eax+42]
|
|
endif
|
|
|
|
nf10_71:mov ax, bx
|
|
shl eax, 16
|
|
nf10_72:mov ax, bx
|
|
mov [edi], eax
|
|
nf10_73:mov ax, bx
|
|
shl eax, 16
|
|
nf10_74:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf10_75:mov ax, bx
|
|
shl eax, 16
|
|
nf10_76:mov ax, bx
|
|
mov [edi], eax
|
|
nf10_77:mov ax, bx
|
|
shl eax, 16
|
|
nf10_78:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf10_81:mov ax, bx
|
|
shl eax, 16
|
|
nf10_82:mov ax, bx
|
|
mov [edi], eax
|
|
nf10_83:mov ax, bx
|
|
shl eax, 16
|
|
nf10_84:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf10_85:mov ax, bx
|
|
shl eax, 16
|
|
nf10_86:mov ax, bx
|
|
mov [edi], eax
|
|
nf10_87:mov ax, bx
|
|
shl eax, 16
|
|
nf10_88:mov ax, bx
|
|
mov [edi+4], eax
|
|
|
|
pop esi
|
|
pop ebp
|
|
add esi, 48
|
|
sub edi, 8
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
;nf10+16
|
|
nf26: ; 2x1 4x8x2 (32 bytes)
|
|
|
|
test word ptr [esi+16], 08000h
|
|
jnz nf42
|
|
|
|
if 0 ;debug
|
|
add esi, 32
|
|
mov ebx, 0
|
|
jmp nf_solid
|
|
endif
|
|
xor eax, eax
|
|
lea ecx, nfhpk_mov4
|
|
lea edx, byte ptr ds:nf26_11+2
|
|
|
|
mov al, [esi+8]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_11-nf26_11)], bh
|
|
mov [edx+(nf26_12-nf26_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf26_13-nf26_11)], bh
|
|
mov [edx+(nf26_14-nf26_11)], bl
|
|
|
|
mov al, [esi+9]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_15-nf26_11)], bh
|
|
mov [edx+(nf26_16-nf26_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf26_17-nf26_11)], bh
|
|
mov [edx+(nf26_18-nf26_11)], bl
|
|
|
|
|
|
mov al, [esi+10]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_21-nf26_11)], bh
|
|
mov [edx+(nf26_22-nf26_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf26_23-nf26_11)], bh
|
|
mov [edx+(nf26_24-nf26_11)], bl
|
|
|
|
mov al, [esi+11]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_25-nf26_11)], bh
|
|
mov [edx+(nf26_26-nf26_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf26_27-nf26_11)], bh
|
|
mov [edx+(nf26_28-nf26_11)], bl
|
|
|
|
mov al, [esi+12]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_31-nf26_11)], bh
|
|
mov [edx+(nf26_32-nf26_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf26_33-nf26_11)], bh
|
|
mov [edx+(nf26_34-nf26_11)], bl
|
|
|
|
mov al, [esi+13]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_35-nf26_11)], bh
|
|
mov [edx+(nf26_36-nf26_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf26_37-nf26_11)], bh
|
|
mov [edx+(nf26_38-nf26_11)], bl
|
|
|
|
|
|
mov al, [esi+14]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_41-nf26_11)], bh
|
|
mov [edx+(nf26_42-nf26_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf26_43-nf26_11)], bh
|
|
mov [edx+(nf26_44-nf26_11)], bl
|
|
|
|
mov al, [esi+15]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_45-nf26_11)], bh
|
|
mov [edx+(nf26_46-nf26_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf26_47-nf26_11)], bh
|
|
mov [edx+(nf26_48-nf26_11)], bl
|
|
|
|
|
|
lea edx, [edx+(nf26_51-nf26_11)]
|
|
|
|
mov al, [esi+24]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_51-nf26_51)], bh
|
|
mov [edx+(nf26_52-nf26_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf26_53-nf26_51)], bh
|
|
mov [edx+(nf26_54-nf26_51)], bl
|
|
|
|
mov al, [esi+25]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_55-nf26_51)], bh
|
|
mov [edx+(nf26_56-nf26_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf26_57-nf26_51)], bh
|
|
mov [edx+(nf26_58-nf26_51)], bl
|
|
|
|
|
|
mov al, [esi+26]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_61-nf26_51)], bh
|
|
mov [edx+(nf26_62-nf26_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf26_63-nf26_51)], bh
|
|
mov [edx+(nf26_64-nf26_51)], bl
|
|
|
|
mov al, [esi+27]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_65-nf26_51)], bh
|
|
mov [edx+(nf26_66-nf26_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf26_67-nf26_51)], bh
|
|
mov [edx+(nf26_68-nf26_51)], bl
|
|
|
|
|
|
mov al, [esi+28]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_71-nf26_51)], bh
|
|
mov [edx+(nf26_72-nf26_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf26_73-nf26_51)], bh
|
|
mov [edx+(nf26_74-nf26_51)], bl
|
|
|
|
mov al, [esi+29]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_75-nf26_51)], bh
|
|
mov [edx+(nf26_76-nf26_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf26_77-nf26_51)], bh
|
|
mov [edx+(nf26_78-nf26_51)], bl
|
|
|
|
|
|
mov al, [esi+30]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_81-nf26_51)], bh
|
|
mov [edx+(nf26_82-nf26_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf26_83-nf26_51)], bh
|
|
mov [edx+(nf26_84-nf26_51)], bl
|
|
|
|
mov al, [esi+31]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_85-nf26_51)], bh
|
|
mov [edx+(nf26_86-nf26_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf26_87-nf26_51)], bh
|
|
mov [edx+(nf26_88-nf26_51)], bl
|
|
|
|
push ebp
|
|
push esi
|
|
; Load bx,dx,cx,bp with four colors
|
|
if TRANS16
|
|
Trans16 bx, esi, 1
|
|
Trans16 dx, esi+2
|
|
Trans16 cx, esi+4
|
|
Trans16 bp, esi+6
|
|
else
|
|
mov bx, [esi]
|
|
and ebx, 07fffh
|
|
mov dx, [esi+2]
|
|
mov cx, [esi+4]
|
|
mov bp, [esi+6]
|
|
endif
|
|
mov esi, nf_width
|
|
|
|
jmp nf26_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf26_0:
|
|
|
|
nf26_11:mov ax, bx
|
|
shl eax, 16
|
|
nf26_12:mov ax, bx
|
|
mov [edi], eax
|
|
nf26_13:mov ax, bx
|
|
shl eax, 16
|
|
nf26_14:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf26_15:mov ax, bx
|
|
shl eax, 16
|
|
nf26_16:mov ax, bx
|
|
mov [edi], eax
|
|
nf26_17:mov ax, bx
|
|
shl eax, 16
|
|
nf26_18:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf26_21:mov ax, bx
|
|
shl eax, 16
|
|
nf26_22:mov ax, bx
|
|
mov [edi], eax
|
|
nf26_23:mov ax, bx
|
|
shl eax, 16
|
|
nf26_24:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf26_25:mov ax, bx
|
|
shl eax, 16
|
|
nf26_26:mov ax, bx
|
|
mov [edi], eax
|
|
nf26_27:mov ax, bx
|
|
shl eax, 16
|
|
nf26_28:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf26_31:mov ax, bx
|
|
shl eax, 16
|
|
nf26_32:mov ax, bx
|
|
mov [edi], eax
|
|
nf26_33:mov ax, bx
|
|
shl eax, 16
|
|
nf26_34:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf26_35:mov ax, bx
|
|
shl eax, 16
|
|
nf26_36:mov ax, bx
|
|
mov [edi], eax
|
|
nf26_37:mov ax, bx
|
|
shl eax, 16
|
|
nf26_38:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf26_41:mov ax, bx
|
|
shl eax, 16
|
|
nf26_42:mov ax, bx
|
|
mov [edi], eax
|
|
nf26_43:mov ax, bx
|
|
shl eax, 16
|
|
nf26_44:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf26_45:mov ax, bx
|
|
shl eax, 16
|
|
nf26_46:mov ax, bx
|
|
mov [edi], eax
|
|
nf26_47:mov ax, bx
|
|
shl eax, 16
|
|
nf26_48:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
lea eax, [esi*8-8]
|
|
sub edi, eax
|
|
|
|
; Load bx,dx,cx,bp with four colors
|
|
if TRANS16
|
|
mov esi, [esp]
|
|
Trans16 bx, esi+16
|
|
Trans16 dx, esi+18
|
|
Trans16 cx, esi+20
|
|
Trans16 bp, esi+22
|
|
mov esi, nf_width
|
|
else
|
|
mov eax, [esp]
|
|
mov bx, [eax+16]
|
|
mov dx, [eax+18]
|
|
mov cx, [eax+20]
|
|
mov bp, [eax+22]
|
|
endif
|
|
|
|
nf26_51:mov ax, bx
|
|
shl eax, 16
|
|
nf26_52:mov ax, bx
|
|
mov [edi], eax
|
|
nf26_53:mov ax, bx
|
|
shl eax, 16
|
|
nf26_54:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf26_55:mov ax, bx
|
|
shl eax, 16
|
|
nf26_56:mov ax, bx
|
|
mov [edi], eax
|
|
nf26_57:mov ax, bx
|
|
shl eax, 16
|
|
nf26_58:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf26_61:mov ax, bx
|
|
shl eax, 16
|
|
nf26_62:mov ax, bx
|
|
mov [edi], eax
|
|
nf26_63:mov ax, bx
|
|
shl eax, 16
|
|
nf26_64:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf26_65:mov ax, bx
|
|
shl eax, 16
|
|
nf26_66:mov ax, bx
|
|
mov [edi], eax
|
|
nf26_67:mov ax, bx
|
|
shl eax, 16
|
|
nf26_68:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf26_71:mov ax, bx
|
|
shl eax, 16
|
|
nf26_72:mov ax, bx
|
|
mov [edi], eax
|
|
nf26_73:mov ax, bx
|
|
shl eax, 16
|
|
nf26_74:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf26_75:mov ax, bx
|
|
shl eax, 16
|
|
nf26_76:mov ax, bx
|
|
mov [edi], eax
|
|
nf26_77:mov ax, bx
|
|
shl eax, 16
|
|
nf26_78:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf26_81:mov ax, bx
|
|
shl eax, 16
|
|
nf26_82:mov ax, bx
|
|
mov [edi], eax
|
|
nf26_83:mov ax, bx
|
|
shl eax, 16
|
|
nf26_84:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf26_85:mov ax, bx
|
|
shl eax, 16
|
|
nf26_86:mov ax, bx
|
|
mov [edi], eax
|
|
nf26_87:mov ax, bx
|
|
shl eax, 16
|
|
nf26_88:mov ax, bx
|
|
mov [edi+4], eax
|
|
|
|
pop esi
|
|
pop ebp
|
|
add esi, 32
|
|
sub edi, 8
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
;nf10+32
|
|
nf42: ; 1x2 8x4x2 (32 bytes)
|
|
|
|
if 0 ;debug
|
|
add esi, 32
|
|
mov ebx, 0
|
|
jmp nf_solid
|
|
endif
|
|
xor eax, eax
|
|
lea ecx, nfhpk_mov4
|
|
lea edx, byte ptr ds:nf42_11+2
|
|
|
|
mov al, [esi+8]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_11-nf42_11)], bh
|
|
mov [edx+(nf42_12-nf42_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf42_13-nf42_11)], bh
|
|
mov [edx+(nf42_14-nf42_11)], bl
|
|
|
|
mov al, [esi+9]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_15-nf42_11)], bh
|
|
mov [edx+(nf42_16-nf42_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf42_17-nf42_11)], bh
|
|
mov [edx+(nf42_18-nf42_11)], bl
|
|
|
|
|
|
mov al, [esi+10]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_21-nf42_11)], bh
|
|
mov [edx+(nf42_22-nf42_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf42_23-nf42_11)], bh
|
|
mov [edx+(nf42_24-nf42_11)], bl
|
|
|
|
mov al, [esi+11]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_25-nf42_11)], bh
|
|
mov [edx+(nf42_26-nf42_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf42_27-nf42_11)], bh
|
|
mov [edx+(nf42_28-nf42_11)], bl
|
|
|
|
|
|
mov al, [esi+12]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_31-nf42_11)], bh
|
|
mov [edx+(nf42_32-nf42_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf42_33-nf42_11)], bh
|
|
mov [edx+(nf42_34-nf42_11)], bl
|
|
|
|
mov al, [esi+13]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_35-nf42_11)], bh
|
|
mov [edx+(nf42_36-nf42_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf42_37-nf42_11)], bh
|
|
mov [edx+(nf42_38-nf42_11)], bl
|
|
|
|
|
|
mov al, [esi+14]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_41-nf42_11)], bh
|
|
mov [edx+(nf42_42-nf42_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf42_43-nf42_11)], bh
|
|
mov [edx+(nf42_44-nf42_11)], bl
|
|
|
|
mov al, [esi+15]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_45-nf42_11)], bh
|
|
mov [edx+(nf42_46-nf42_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf42_47-nf42_11)], bh
|
|
mov [edx+(nf42_48-nf42_11)], bl
|
|
|
|
|
|
lea edx, [edx+(nf42_51-nf42_11)]
|
|
|
|
mov al, [esi+24]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_51-nf42_51)], bh
|
|
mov [edx+(nf42_52-nf42_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf42_53-nf42_51)], bh
|
|
mov [edx+(nf42_54-nf42_51)], bl
|
|
|
|
mov al, [esi+25]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_55-nf42_51)], bh
|
|
mov [edx+(nf42_56-nf42_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf42_57-nf42_51)], bh
|
|
mov [edx+(nf42_58-nf42_51)], bl
|
|
|
|
|
|
mov al, [esi+26]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_61-nf42_51)], bh
|
|
mov [edx+(nf42_62-nf42_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf42_63-nf42_51)], bh
|
|
mov [edx+(nf42_64-nf42_51)], bl
|
|
|
|
mov al, [esi+27]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_65-nf42_51)], bh
|
|
mov [edx+(nf42_66-nf42_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf42_67-nf42_51)], bh
|
|
mov [edx+(nf42_68-nf42_51)], bl
|
|
|
|
|
|
mov al, [esi+28]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_71-nf42_51)], bh
|
|
mov [edx+(nf42_72-nf42_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf42_73-nf42_51)], bh
|
|
mov [edx+(nf42_74-nf42_51)], bl
|
|
|
|
mov al, [esi+29]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_75-nf42_51)], bh
|
|
mov [edx+(nf42_76-nf42_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf42_77-nf42_51)], bh
|
|
mov [edx+(nf42_78-nf42_51)], bl
|
|
|
|
|
|
mov al, [esi+30]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_81-nf42_51)], bh
|
|
mov [edx+(nf42_82-nf42_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf42_83-nf42_51)], bh
|
|
mov [edx+(nf42_84-nf42_51)], bl
|
|
|
|
mov al, [esi+31]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_85-nf42_51)], bh
|
|
mov [edx+(nf42_86-nf42_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf42_87-nf42_51)], bh
|
|
mov [edx+(nf42_88-nf42_51)], bl
|
|
|
|
push ebp
|
|
push esi
|
|
; Load bx,dx,cx,bp with four colors
|
|
if TRANS16
|
|
Trans16 bx, esi, 1
|
|
Trans16 dx, esi+2
|
|
Trans16 cx, esi+4
|
|
Trans16 bp, esi+6
|
|
else
|
|
mov bx, [esi]
|
|
and ebx, 07fffh
|
|
mov dx, [esi+2]
|
|
mov cx, [esi+4]
|
|
mov bp, [esi+6]
|
|
endif
|
|
mov esi, nf_width
|
|
|
|
jmp nf42_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf42_0:
|
|
|
|
nf42_11:mov ax, bx
|
|
shl eax, 16
|
|
nf42_12:mov ax, bx
|
|
mov [edi], eax
|
|
nf42_13:mov ax, bx
|
|
shl eax, 16
|
|
nf42_14:mov ax, bx
|
|
mov [edi+4], eax
|
|
nf42_15:mov ax, bx
|
|
shl eax, 16
|
|
nf42_16:mov ax, bx
|
|
mov [edi+8], eax
|
|
nf42_17:mov ax, bx
|
|
shl eax, 16
|
|
nf42_18:mov ax, bx
|
|
mov [edi+12], eax
|
|
add edi, esi
|
|
|
|
nf42_21:mov ax, bx
|
|
shl eax, 16
|
|
nf42_22:mov ax, bx
|
|
mov [edi], eax
|
|
nf42_23:mov ax, bx
|
|
shl eax, 16
|
|
nf42_24:mov ax, bx
|
|
mov [edi+4], eax
|
|
nf42_25:mov ax, bx
|
|
shl eax, 16
|
|
nf42_26:mov ax, bx
|
|
mov [edi+8], eax
|
|
nf42_27:mov ax, bx
|
|
shl eax, 16
|
|
nf42_28:mov ax, bx
|
|
mov [edi+12], eax
|
|
add edi, esi
|
|
|
|
nf42_31:mov ax, bx
|
|
shl eax, 16
|
|
nf42_32:mov ax, bx
|
|
mov [edi], eax
|
|
nf42_33:mov ax, bx
|
|
shl eax, 16
|
|
nf42_34:mov ax, bx
|
|
mov [edi+4], eax
|
|
nf42_35:mov ax, bx
|
|
shl eax, 16
|
|
nf42_36:mov ax, bx
|
|
mov [edi+8], eax
|
|
nf42_37:mov ax, bx
|
|
shl eax, 16
|
|
nf42_38:mov ax, bx
|
|
mov [edi+12], eax
|
|
add edi, esi
|
|
|
|
nf42_41:mov ax, bx
|
|
shl eax, 16
|
|
nf42_42:mov ax, bx
|
|
mov [edi], eax
|
|
nf42_43:mov ax, bx
|
|
shl eax, 16
|
|
nf42_44:mov ax, bx
|
|
mov [edi+4], eax
|
|
nf42_45:mov ax, bx
|
|
shl eax, 16
|
|
nf42_46:mov ax, bx
|
|
mov [edi+8], eax
|
|
nf42_47:mov ax, bx
|
|
shl eax, 16
|
|
nf42_48:mov ax, bx
|
|
mov [edi+12], eax
|
|
add edi, esi
|
|
|
|
; Load bx,dx,cx,bp with four colors
|
|
if TRANS16
|
|
mov esi, [esp]
|
|
Trans16 bx, esi+16, 1
|
|
Trans16 dx, esi+18
|
|
Trans16 cx, esi+20
|
|
Trans16 bp, esi+22
|
|
mov esi, nf_width
|
|
else
|
|
mov eax, [esp]
|
|
mov bx, [eax+16]
|
|
and ebx, 07fffh
|
|
mov dx, [eax+18]
|
|
mov cx, [eax+20]
|
|
mov bp, [eax+22]
|
|
endif
|
|
|
|
nf42_51:mov ax, bx
|
|
shl eax, 16
|
|
nf42_52:mov ax, bx
|
|
mov [edi], eax
|
|
nf42_53:mov ax, bx
|
|
shl eax, 16
|
|
nf42_54:mov ax, bx
|
|
mov [edi+4], eax
|
|
nf42_55:mov ax, bx
|
|
shl eax, 16
|
|
nf42_56:mov ax, bx
|
|
mov [edi+8], eax
|
|
nf42_57:mov ax, bx
|
|
shl eax, 16
|
|
nf42_58:mov ax, bx
|
|
mov [edi+12], eax
|
|
add edi, esi
|
|
|
|
nf42_61:mov ax, bx
|
|
shl eax, 16
|
|
nf42_62:mov ax, bx
|
|
mov [edi], eax
|
|
nf42_63:mov ax, bx
|
|
shl eax, 16
|
|
nf42_64:mov ax, bx
|
|
mov [edi+4], eax
|
|
nf42_65:mov ax, bx
|
|
shl eax, 16
|
|
nf42_66:mov ax, bx
|
|
mov [edi+8], eax
|
|
nf42_67:mov ax, bx
|
|
shl eax, 16
|
|
nf42_68:mov ax, bx
|
|
mov [edi+12], eax
|
|
add edi, esi
|
|
|
|
nf42_71:mov ax, bx
|
|
shl eax, 16
|
|
nf42_72:mov ax, bx
|
|
mov [edi], eax
|
|
nf42_73:mov ax, bx
|
|
shl eax, 16
|
|
nf42_74:mov ax, bx
|
|
mov [edi+4], eax
|
|
nf42_75:mov ax, bx
|
|
shl eax, 16
|
|
nf42_76:mov ax, bx
|
|
mov [edi+8], eax
|
|
nf42_77:mov ax, bx
|
|
shl eax, 16
|
|
nf42_78:mov ax, bx
|
|
mov [edi+12], eax
|
|
add edi, esi
|
|
|
|
nf42_81:mov ax, bx
|
|
shl eax, 16
|
|
nf42_82:mov ax, bx
|
|
mov [edi], eax
|
|
nf42_83:mov ax, bx
|
|
shl eax, 16
|
|
nf42_84:mov ax, bx
|
|
mov [edi+4], eax
|
|
nf42_85:mov ax, bx
|
|
shl eax, 16
|
|
nf42_86:mov ax, bx
|
|
mov [edi+8], eax
|
|
nf42_87:mov ax, bx
|
|
shl eax, 16
|
|
nf42_88:mov ax, bx
|
|
mov [edi+12], eax
|
|
|
|
pop esi
|
|
pop ebp
|
|
add esi, 32
|
|
sub edi, nfpk_back_right
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf11: ; 8x8x16 (128 bytes)
|
|
if 0 ;debug
|
|
add esi, 128
|
|
mov ebx, 0
|
|
jmp nf_solid
|
|
endif
|
|
mov edx, nf_width
|
|
|
|
if TRANS16
|
|
|
|
Trans16Blk MACRO idx
|
|
Trans16 bx, idx
|
|
mov [edi], bx
|
|
Trans16 bx, idx+2
|
|
mov [edi+2], bx
|
|
Trans16 bx, idx+4
|
|
mov [edi+4], bx
|
|
Trans16 bx, idx+6
|
|
mov [edi+6], bx
|
|
Trans16 bx, idx+8
|
|
mov [edi+8], bx
|
|
Trans16 bx, idx+10
|
|
mov [edi+10], bx
|
|
Trans16 bx, idx+12
|
|
mov [edi+12], bx
|
|
Trans16 bx, idx+14
|
|
mov [edi+14], bx
|
|
ENDM
|
|
|
|
Trans16Blk esi ;0
|
|
add edi, edx
|
|
Trans16Blk esi+16 ;1
|
|
add edi, edx
|
|
Trans16Blk esi+32 ;2
|
|
add edi, edx
|
|
Trans16Blk esi+48 ;3
|
|
add edi, edx
|
|
Trans16Blk esi+64 ;4
|
|
add edi, edx
|
|
Trans16Blk esi+80 ;5
|
|
add edi, edx
|
|
Trans16Blk esi+96 ;6
|
|
add edi, edx
|
|
Trans16Blk esi+112 ;7
|
|
else
|
|
mov eax, [esi] ;0
|
|
mov [edi], eax
|
|
mov eax, [esi+4]
|
|
mov [edi+4], eax
|
|
mov eax, [esi+8]
|
|
mov [edi+8], eax
|
|
mov eax, [esi+12]
|
|
mov [edi+12], eax
|
|
add edi, edx
|
|
mov eax, [esi+16] ;1
|
|
mov [edi], eax
|
|
mov eax, [esi+20]
|
|
mov [edi+4], eax
|
|
mov eax, [esi+24]
|
|
mov [edi+8], eax
|
|
mov eax, [esi+28]
|
|
mov [edi+12], eax
|
|
add edi, edx
|
|
mov eax, [esi+32] ;2
|
|
mov [edi], eax
|
|
mov eax, [esi+36]
|
|
mov [edi+4], eax
|
|
mov eax, [esi+40]
|
|
mov [edi+8], eax
|
|
mov eax, [esi+44]
|
|
mov [edi+12], eax
|
|
add edi, edx
|
|
mov eax, [esi+48] ;3
|
|
mov [edi], eax
|
|
mov eax, [esi+52]
|
|
mov [edi+4], eax
|
|
mov eax, [esi+56]
|
|
mov [edi+8], eax
|
|
mov eax, [esi+60]
|
|
mov [edi+12], eax
|
|
add edi, edx
|
|
mov eax, [esi+64] ;4
|
|
mov [edi], eax
|
|
mov eax, [esi+68]
|
|
mov [edi+4], eax
|
|
mov eax, [esi+72]
|
|
mov [edi+8], eax
|
|
mov eax, [esi+76]
|
|
mov [edi+12], eax
|
|
add edi, edx
|
|
mov eax, [esi+80] ;5
|
|
mov [edi], eax
|
|
mov eax, [esi+84]
|
|
mov [edi+4], eax
|
|
mov eax, [esi+88]
|
|
mov [edi+8], eax
|
|
mov eax, [esi+92]
|
|
mov [edi+12], eax
|
|
add edi, edx
|
|
mov eax, [esi+96] ;6
|
|
mov [edi], eax
|
|
mov eax, [esi+100]
|
|
mov [edi+4], eax
|
|
mov eax, [esi+104]
|
|
mov [edi+8], eax
|
|
mov eax, [esi+108]
|
|
mov [edi+12], eax
|
|
add edi, edx
|
|
mov eax, [esi+112] ;7
|
|
mov [edi], eax
|
|
mov eax, [esi+116]
|
|
mov [edi+4], eax
|
|
mov eax, [esi+120]
|
|
mov [edi+8], eax
|
|
mov eax, [esi+124]
|
|
mov [edi+12], eax
|
|
endif
|
|
|
|
add esi, 128
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf12: ; low 4x4x16 (32 bytes)
|
|
if 0 ;debug
|
|
add esi, 32
|
|
mov ebx, 0
|
|
jmp nf_solid
|
|
endif
|
|
mov edx, nf_width
|
|
|
|
Trans16 bx, esi
|
|
shrd eax, ebx, 16
|
|
mov ax, bx
|
|
mov [edi], eax
|
|
mov [edi+edx], eax
|
|
|
|
Trans16 bx, esi+2
|
|
shrd eax, ebx, 16
|
|
mov ax, bx
|
|
mov [edi+4], eax
|
|
mov [edi+edx+4], eax
|
|
|
|
Trans16 bx, esi+4
|
|
shrd eax, ebx, 16
|
|
mov ax, bx
|
|
mov [edi+8], eax
|
|
mov [edi+edx+8], eax
|
|
|
|
Trans16 bx, esi+6
|
|
shrd eax, ebx, 16
|
|
mov ax, bx
|
|
mov [edi+12], eax
|
|
mov [edi+edx+12], eax
|
|
|
|
lea edi, [edi+edx*2]
|
|
|
|
Trans16 bx, esi+8
|
|
shrd eax, ebx, 16
|
|
mov ax, bx
|
|
mov [edi], eax
|
|
mov [edi+edx], eax
|
|
|
|
Trans16 bx, esi+10
|
|
shrd eax, ebx, 16
|
|
mov ax, bx
|
|
mov [edi+4], eax
|
|
mov [edi+edx+4], eax
|
|
|
|
Trans16 bx, esi+12
|
|
shrd eax, ebx, 16
|
|
mov ax, bx
|
|
mov [edi+8], eax
|
|
mov [edi+edx+8], eax
|
|
|
|
Trans16 bx, esi+14
|
|
shrd eax, ebx, 16
|
|
mov ax, bx
|
|
mov [edi+12], eax
|
|
mov [edi+edx+12], eax
|
|
|
|
lea edi, [edi+edx*2]
|
|
|
|
Trans16 bx, esi+16
|
|
shrd eax, ebx, 16
|
|
mov ax, bx
|
|
mov [edi], eax
|
|
mov [edi+edx], eax
|
|
|
|
Trans16 bx, esi+18
|
|
shrd eax, ebx, 16
|
|
mov ax, bx
|
|
mov [edi+4], eax
|
|
mov [edi+edx+4], eax
|
|
|
|
Trans16 bx, esi+20
|
|
shrd eax, ebx, 16
|
|
mov ax, bx
|
|
mov [edi+8], eax
|
|
mov [edi+edx+8], eax
|
|
|
|
Trans16 bx, esi+22
|
|
shrd eax, ebx, 16
|
|
mov ax, bx
|
|
mov [edi+12], eax
|
|
mov [edi+edx+12], eax
|
|
|
|
lea edi, [edi+edx*2]
|
|
|
|
Trans16 bx, esi+24
|
|
shrd eax, ebx, 16
|
|
mov ax, bx
|
|
mov [edi], eax
|
|
mov [edi+edx], eax
|
|
|
|
Trans16 bx, esi+26
|
|
shrd eax, ebx, 16
|
|
mov ax, bx
|
|
mov [edi+4], eax
|
|
mov [edi+edx+4], eax
|
|
|
|
Trans16 bx, esi+28
|
|
shrd eax, ebx, 16
|
|
mov ax, bx
|
|
mov [edi+8], eax
|
|
mov [edi+edx+8], eax
|
|
|
|
Trans16 bx, esi+30
|
|
shrd eax, ebx, 16
|
|
mov ax, bx
|
|
mov [edi+12], eax
|
|
mov [edi+edx+12], eax
|
|
|
|
add edi, edx
|
|
|
|
sub edi, nfpk_back_right
|
|
add esi, 32
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf13: ; 2x2 4x4x0 (8 bytes)
|
|
if 0 ;debug
|
|
add esi, 8
|
|
mov ebx, 0
|
|
jmp nf_solid
|
|
endif
|
|
mov edx, nf_width
|
|
|
|
if TRANS16
|
|
Trans16 cx, esi
|
|
shrd ebx, ecx, 16
|
|
mov bx, cx
|
|
|
|
Trans16 cx, esi+2
|
|
shrd eax, ecx, 16
|
|
mov ax, cx
|
|
mov ecx, eax
|
|
else
|
|
mov ax, [esi]
|
|
shrd ebx, eax, 16
|
|
mov bx, ax
|
|
|
|
mov ax, [esi+2]
|
|
shrd ecx, eax, 16
|
|
mov cx, ax
|
|
endif
|
|
|
|
mov [edi], ebx
|
|
mov [edi+4], ebx
|
|
mov [edi+8], ecx
|
|
mov [edi+12], ecx
|
|
mov [edi+edx], ebx
|
|
mov [edi+edx+4], ebx
|
|
mov [edi+edx+8], ecx
|
|
mov [edi+edx+12], ecx
|
|
lea edi, [edi+edx*2]
|
|
mov [edi], ebx
|
|
mov [edi+4], ebx
|
|
mov [edi+8], ecx
|
|
mov [edi+12], ecx
|
|
mov [edi+edx], ebx
|
|
mov [edi+edx+4], ebx
|
|
mov [edi+edx+8], ecx
|
|
mov [edi+edx+12], ecx
|
|
lea edi, [edi+edx*2]
|
|
|
|
if TRANS16
|
|
Trans16 cx, esi+4
|
|
shrd ebx, ecx, 16
|
|
mov bx, cx
|
|
|
|
Trans16 cx, esi+6
|
|
shrd eax, ecx, 16
|
|
mov ax, cx
|
|
mov ecx, eax
|
|
else
|
|
mov ax, [esi+4]
|
|
shrd ebx, eax, 16
|
|
mov bx, ax
|
|
|
|
mov ax, [esi+6]
|
|
shrd ecx, eax, 16
|
|
mov cx, ax
|
|
endif
|
|
|
|
mov [edi], ebx
|
|
mov [edi+4], ebx
|
|
mov [edi+8], ecx
|
|
mov [edi+12], ecx
|
|
mov [edi+edx], ebx
|
|
mov [edi+edx+4], ebx
|
|
mov [edi+edx+8], ecx
|
|
mov [edi+edx+12], ecx
|
|
lea edi, [edi+edx*2]
|
|
mov [edi], ebx
|
|
mov [edi+4], ebx
|
|
mov [edi+8], ecx
|
|
mov [edi+12], ecx
|
|
mov [edi+edx], ebx
|
|
mov [edi+edx+4], ebx
|
|
mov [edi+edx+8], ecx
|
|
mov [edi+edx+12], ecx
|
|
add edi, edx
|
|
|
|
sub edi, nfpk_back_right
|
|
add esi, 8
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf14: ; 8x8x0 (2 bytes)
|
|
Trans16 cx, esi
|
|
add esi, 2
|
|
shrd ebx, ecx, 16
|
|
mov bx, cx
|
|
|
|
nf_solid:
|
|
mov edx, nf_width
|
|
|
|
mov [edi], ebx
|
|
mov [edi+4], ebx
|
|
mov [edi+8], ebx
|
|
mov [edi+12], ebx
|
|
add edi, edx
|
|
mov [edi], ebx
|
|
mov [edi+4], ebx
|
|
mov [edi+8], ebx
|
|
mov [edi+12], ebx
|
|
add edi, edx
|
|
mov [edi], ebx
|
|
mov [edi+4], ebx
|
|
mov [edi+8], ebx
|
|
mov [edi+12], ebx
|
|
add edi, edx
|
|
mov [edi], ebx
|
|
mov [edi+4], ebx
|
|
mov [edi+8], ebx
|
|
mov [edi+12], ebx
|
|
add edi, edx
|
|
mov [edi], ebx
|
|
mov [edi+4], ebx
|
|
mov [edi+8], ebx
|
|
mov [edi+12], ebx
|
|
add edi, edx
|
|
mov [edi], ebx
|
|
mov [edi+4], ebx
|
|
mov [edi+8], ebx
|
|
mov [edi+12], ebx
|
|
add edi, edx
|
|
mov [edi], ebx
|
|
mov [edi+4], ebx
|
|
mov [edi+8], ebx
|
|
mov [edi+12], ebx
|
|
add edi, edx
|
|
mov [edi], ebx
|
|
mov [edi+4], ebx
|
|
mov [edi+8], ebx
|
|
mov [edi+12], ebx
|
|
|
|
sub edi, nfpk_back_right ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf15: ; unused
|
|
retn
|
|
|
|
nfHPkDecomp ENDP
|
|
|
|
endif ; PKDATA
|
|
|
|
;---------------------------------------------------------------------
|
|
; ShowFrame
|
|
;------------
|
|
|
|
EXTERN sf_LineWidth: DWORD ;unsigned sf_LineWidth; // Distance between lines in memory
|
|
|
|
; Banked screen parameters
|
|
EXTERN sf_SetBank: PTRPROC ;unsigned long sf_SetBank;
|
|
EXTERN sf_WinGran: DWORD ;unsigned sf_WinGran;
|
|
EXTERN sf_WinSize: DWORD ;unsigned long sf_WinSize;
|
|
EXTERN sf_WinGranPerSize: DWORD ;unsigned sf_WinGranPerSize;
|
|
;{sf_WriteWinPtr and sf_WriteWinLimit replace sf_WriteWinSeg, see mveliba.asm}
|
|
EXTERN sf_WriteWinPtr: PTRBYTE ;unsigned char *sf_WriteWinPtr;
|
|
EXTERN sf_WriteWinLimit: PTRBYTE ;unsigned char *WriteWinLimit;
|
|
EXTERN sf_WriteWin: DWORD ;unsigned sf_WriteWin;
|
|
|
|
if SCALING
|
|
EXTERN opt_hscale_step: DWORD
|
|
EXTERN opt_hscale_adj: DWORD
|
|
endif
|
|
|
|
;void mve_ShowFrameField(
|
|
; unsigned char *buf, unsigned bufw, unsigned bufh,
|
|
; unsigned sx, unsigned sy, unsigned w, unsigned h,
|
|
; unsigned dstx, unsigned dsty, unsigned field)
|
|
|
|
mve_ShowFrameField PROC USES ESI EDI EBX, \
|
|
buf:PTRBYTE, bufw:DWORD, bufh:DWORD, \
|
|
sx:DWORD, sy:DWORD, w:DWORD, h:DWORD, \
|
|
dstx:DWORD, dsty:DWORD, field:DWORD
|
|
LOCAL bank:DWORD
|
|
LOCAL w4:DWORD
|
|
LOCAL new_src_line:DWORD
|
|
LOCAL linestep:DWORD
|
|
LOCAL new_dst_line:DWORD
|
|
|
|
mov ax, ds ; Insure es==ds for symantec flat mode
|
|
mov es, ax
|
|
|
|
mov eax, w ; w4 = w>>2
|
|
shr eax, 2
|
|
mov w4, eax
|
|
|
|
;;; <WIP>
|
|
;;; In stretched width mode, we either keep 4/5 (a) of the source pixels,
|
|
;;; or duplicate every fourth pixel to magnify by 5/4 (b).
|
|
;;; In these cases, new_src_line is either bufw-w*5/4 (a) or bufw-w*4/5 (b).
|
|
;;; Let ScaleStep be 5 (a) or 3 (b) instead of 4. This is the amount to advance
|
|
;;; the source after copying 32-bits from source to destination.
|
|
;;; The coordinate system used for the source will be a simulated scaled system.
|
|
;;; Rather than scale height, I plan to use alternate vertical resolutions. However,
|
|
;;; it might be a good idea to also provide for scaled height in case we want a
|
|
;;; higher resolution border.
|
|
;;; Question: Do we still need to support transferring subrectangles?
|
|
|
|
if SCALING
|
|
.if opt_hscale_step==4
|
|
endif
|
|
mov eax, bufw ; new_src_line = bufw - w
|
|
sub eax, w
|
|
mov new_src_line, eax
|
|
if SCALING
|
|
.else
|
|
mov eax, opt_hscale_adj
|
|
mov new_src_line, eax
|
|
.endif
|
|
endif
|
|
|
|
mov eax, sf_LineWidth ; linestep = sf_LineWidth<<1;
|
|
.if field ; if (field)
|
|
add eax, eax ; linestep <<= 1;
|
|
.endif
|
|
mov linestep, eax
|
|
|
|
sub eax, w ; new_dst_line = linestep - w;
|
|
mov new_dst_line, eax
|
|
|
|
mov eax, sy ; buf += sy*bufw + sx
|
|
mul bufw
|
|
add eax, sx
|
|
add buf, eax
|
|
|
|
mov eax, sx ; dstx += sx
|
|
add dstx, eax
|
|
|
|
; <WIP> This is a hack. We should pass in src x,y of origin
|
|
; or make dstx/dsty absolute.
|
|
;
|
|
mov eax, bufw ; if (field && sx >= (bufw>>1)
|
|
shr eax, 1
|
|
.if field && sx >= eax
|
|
sub dstx, eax ; dstx -= bufw>>1
|
|
.endif
|
|
|
|
mov eax, sy ; dsty += sy
|
|
add dsty, eax
|
|
|
|
.if sf_SetBank==0 ;------------------
|
|
|
|
|
|
; dst = WriteWinPtr + (dsty*linestep+dstx)
|
|
mov edi, sf_WriteWinPtr
|
|
mov eax, dsty
|
|
mul linestep
|
|
add eax, dstx
|
|
add edi, eax
|
|
|
|
.if field & 1
|
|
add edi, sf_LineWidth;
|
|
.endif
|
|
|
|
mov eax, new_src_line
|
|
mov edx, new_dst_line
|
|
mov esi, buf
|
|
mov ebx, h
|
|
if SCALING
|
|
.if opt_hscale_step==3
|
|
sub edi, 8
|
|
sf_lp2a:mov ecx, w4
|
|
shr ecx, 2
|
|
ALIGN 4
|
|
sf_lp2b:mov eax, [esi]
|
|
mov [edi+8], eax
|
|
mov eax, [esi+3]
|
|
mov [edi+12], eax
|
|
add edi, 16
|
|
mov eax, [esi+6]
|
|
mov [edi], eax
|
|
mov eax, [esi+9]
|
|
mov [edi+4], eax
|
|
add esi, 12
|
|
dec ecx
|
|
jnz sf_lp2b
|
|
; To avoid problem of last pixel coming from next line
|
|
; with arrange for w%16==12, so here is where we copy
|
|
; last 12 pixels.
|
|
mov eax, [esi]
|
|
mov [edi+8], eax
|
|
mov eax, [esi+3]
|
|
mov [edi+12], eax
|
|
add edi, 12
|
|
mov eax, [esi+6]
|
|
mov [edi+4], eax
|
|
add esi, 9
|
|
add esi, new_src_line
|
|
add edi, edx
|
|
dec ebx
|
|
jnz sf_lp2a
|
|
add edi, 8
|
|
.else
|
|
endif
|
|
sf_lp: mov ecx, w4 ;width/4
|
|
rep movsd
|
|
add esi, eax
|
|
add edi, edx
|
|
dec ebx
|
|
jnz sf_lp
|
|
if SCALING
|
|
.endif
|
|
endif
|
|
|
|
.else ; sf_SetBank ;------------------
|
|
|
|
|
|
mov esi, buf
|
|
|
|
; start = dsty * linestep + dstx
|
|
|
|
mov eax, linestep
|
|
mul dsty
|
|
.if field & 1
|
|
add eax, sf_LineWidth
|
|
.endif
|
|
add eax, dstx
|
|
; bank = start / WinGran
|
|
; dst = (start % WinGran) + sf_WriteWinPtr
|
|
mov edx, 0
|
|
div sf_WinGran
|
|
mov bank, eax
|
|
mov edi, edx
|
|
add edi, sf_WriteWinPtr
|
|
|
|
; Select new bank
|
|
mov bh, 0
|
|
mov bl, byte ptr sf_WriteWin
|
|
mov edx, bank
|
|
call sf_SetBank
|
|
; eax/edx destroyed by sf_SetBank
|
|
|
|
sf_0: ; rem = sf_WriteWinLimit - dst
|
|
mov eax, sf_WriteWinLimit
|
|
sub eax, edi
|
|
; h2 = (rem+(LineWidth-w))/LineWidth
|
|
add eax, linestep
|
|
sub eax, w
|
|
mov edx, 0
|
|
div linestep
|
|
; if (h<h2) h2=h
|
|
cmp h, eax
|
|
jae skplim
|
|
mov eax, h
|
|
skplim: ; if (h2==0) // No full lines can be transfered.
|
|
or eax, eax
|
|
jz sf_2
|
|
; h-= h2
|
|
; Transfer h2 lines to screen
|
|
sub h, eax
|
|
mov ebx, new_src_line
|
|
mov edx, new_dst_line
|
|
if SCALING
|
|
.if opt_hscale_step==3
|
|
sub edi, 8
|
|
sf_1a3: mov ecx, w4
|
|
shr ecx, 2
|
|
ALIGN 4
|
|
sf_1b3: mov ebx, [esi]
|
|
mov [edi+8], ebx
|
|
mov ebx, [esi+3]
|
|
mov [edi+12], ebx
|
|
add edi, 16
|
|
mov ebx, [esi+6]
|
|
mov [edi], ebx
|
|
mov ebx, [esi+9]
|
|
mov [edi+4], ebx
|
|
add esi, 12
|
|
dec ecx
|
|
jnz sf_1b3
|
|
; To avoid problem of last pixel coming from next line
|
|
; with arrange for w%16==12, so here is where we copy
|
|
; last 12 pixels.
|
|
mov ebx, [esi]
|
|
mov [edi+8], ebx
|
|
mov ebx, [esi+3]
|
|
mov [edi+12], ebx
|
|
add edi, 12
|
|
mov ebx, [esi+6]
|
|
mov [edi+4], ebx
|
|
add esi, 9
|
|
add esi, new_src_line
|
|
add edi, edx
|
|
dec eax
|
|
jnz sf_1a3
|
|
add edi, 8
|
|
.else
|
|
endif
|
|
sf_1: mov ecx, w4 ; width/4
|
|
rep movsd
|
|
add esi, ebx
|
|
add edi, edx
|
|
dec eax
|
|
jnz sf_1
|
|
if SCALING
|
|
.endif
|
|
endif
|
|
|
|
sf_2: ; if (h!=0) // There are still lines to be transfered
|
|
; // transfer partial line
|
|
or eax, h
|
|
jz sf_9
|
|
|
|
; w4a = 0 max (sf_WriteWinLimit-dst)/4
|
|
mov ecx, sf_WriteWinLimit
|
|
sub ecx, edi
|
|
sar ecx, 2
|
|
jns sf_2b
|
|
mov ecx, 0
|
|
sf_2b: push ecx ; Save size of first half
|
|
if SCALING
|
|
.if opt_hscale_step==3
|
|
|
|
ALIGN 4
|
|
or ecx, ecx
|
|
jz sf_2c3
|
|
sf_2b3: mov eax, [esi]
|
|
mov [edi], eax
|
|
add esi, 3
|
|
add edi, 4
|
|
dec ecx
|
|
jnz sf_2b3
|
|
sf_2c3:
|
|
|
|
.else
|
|
endif
|
|
rep movsd
|
|
if SCALING
|
|
.endif
|
|
endif
|
|
|
|
; bank += WinSize/WinGran //Assumes WinSize%WinGran==0
|
|
; off -= (WinSize/WinGran)*WinGran == WinSize
|
|
|
|
mov eax, sf_WinGranPerSize
|
|
add bank, eax
|
|
sub edi, sf_WinSize
|
|
|
|
; Select new bank
|
|
mov bh, 0
|
|
mov bl, byte ptr sf_WriteWin
|
|
mov edx, bank
|
|
call sf_SetBank
|
|
; eax/edx destroyed by sf_SetBank
|
|
|
|
; w4b = w4-w4a // Size of second half
|
|
pop eax ; Size of first half
|
|
mov ecx, w4
|
|
sub ecx, eax ; Size of 2nd half
|
|
if SCALING
|
|
.if opt_hscale_step==3
|
|
ALIGN 4
|
|
or ecx, ecx
|
|
jz sf_8b3
|
|
sf_8a3: mov eax, [esi]
|
|
mov [edi], eax
|
|
add esi, 3
|
|
add edi, 4
|
|
dec ecx
|
|
jnz sf_8a3
|
|
sf_8b3:
|
|
.else
|
|
endif
|
|
rep movsd
|
|
if SCALING
|
|
.endif
|
|
endif
|
|
add esi, new_src_line
|
|
add edi, new_dst_line
|
|
;; --h // Count split line
|
|
;; if (h!=0) continue
|
|
dec h
|
|
jnz sf_0
|
|
sf_9:
|
|
|
|
.endif ; sf_SetBank==0 ;------------------
|
|
|
|
ret
|
|
|
|
mve_ShowFrameField ENDP
|
|
|
|
; Special version of mve_ShowFrameField which converts from
|
|
; 256 colors to 15-bit color. Does not support horizontal
|
|
; magnification.
|
|
|
|
;void mve_ShowFrameFieldHi(
|
|
; unsigned char *buf, unsigned bufw, unsigned bufh,
|
|
; unsigned sx, unsigned sy, unsigned w, unsigned h,
|
|
; unsigned dstx, unsigned dsty, unsigned field)
|
|
|
|
mve_ShowFrameFieldHi PROC USES ESI EDI EBX, \
|
|
buf:PTRBYTE, bufw:DWORD, bufh:DWORD, \
|
|
sx:DWORD, sy:DWORD, w:DWORD, h:DWORD, \
|
|
dstx:DWORD, dsty:DWORD, field:DWORD
|
|
LOCAL bank:DWORD
|
|
LOCAL w4:DWORD
|
|
LOCAL new_src_line:DWORD
|
|
LOCAL linestep:DWORD
|
|
LOCAL new_dst_line:DWORD
|
|
|
|
mov ax, ds ; Insure es==ds for symantec flat mode
|
|
mov es, ax
|
|
|
|
mov eax, w ; w4 = w>>1
|
|
shr eax, 1
|
|
mov w4, eax ; Number of dst words to transfer
|
|
|
|
mov eax, bufw ; new_src_line = bufw - w
|
|
sub eax, w
|
|
mov new_src_line, eax
|
|
|
|
mov eax, sf_LineWidth ; linestep = sf_LineWidth<<1;
|
|
.if field ; if (field)
|
|
add eax, eax ; linestep <<= 1;
|
|
.endif
|
|
mov linestep, eax
|
|
|
|
sub eax, w ; new_dst_line = linestep - w*2;
|
|
sub eax, w
|
|
mov new_dst_line, eax
|
|
|
|
mov eax, sy ; buf += sy*bufw + sx
|
|
mul bufw
|
|
add eax, sx
|
|
add buf, eax
|
|
|
|
mov eax, sx ; dstx += sx
|
|
add dstx, eax
|
|
|
|
; <WIP> This is a hack. We should pass in src x,y of origin
|
|
; or make dstx/dsty absolute.
|
|
;
|
|
mov eax, bufw ; if (field && sx >= (bufw>>1)
|
|
shr eax, 1
|
|
.if field && sx >= eax
|
|
sub dstx, eax ; dstx -= bufw>>1
|
|
.endif
|
|
|
|
mov eax, sy ; dsty += sy
|
|
add dsty, eax
|
|
|
|
.if sf_SetBank==0 ;------------------
|
|
|
|
|
|
; dst = WriteWinPtr + (dsty*linestep+dstx*2)
|
|
mov edi, sf_WriteWinPtr
|
|
mov eax, dsty
|
|
mul linestep
|
|
add eax, dstx
|
|
add eax, dstx
|
|
add edi, eax
|
|
|
|
.if field & 1
|
|
add edi, sf_LineWidth;
|
|
.endif
|
|
|
|
mov esi, buf
|
|
mov ebx, h
|
|
sf_lp: mov ecx, w4 ;width/4
|
|
|
|
push ebx
|
|
lea ebx, pal15_tbl
|
|
xor eax, eax
|
|
sf_movsd1:
|
|
mov al, [esi]
|
|
add esi, 2
|
|
mov dx, [ebx+eax*2]
|
|
mov al, [esi-1]
|
|
shl edx, 16
|
|
mov dx, [ebx+eax*2]
|
|
rol edx, 16
|
|
mov [edi], edx
|
|
add edi, 4
|
|
dec ecx
|
|
jnz sf_movsd1
|
|
pop ebx
|
|
|
|
; rep movsd ;;;;;-----;;;;;
|
|
|
|
add esi, new_src_line
|
|
add edi, new_dst_line
|
|
dec ebx
|
|
jnz sf_lp
|
|
|
|
.else ; sf_SetBank ;------------------
|
|
|
|
|
|
mov esi, buf
|
|
|
|
; start = dsty * linestep + dstx*2
|
|
|
|
mov eax, linestep
|
|
mul dsty
|
|
.if field & 1
|
|
add eax, sf_LineWidth
|
|
.endif
|
|
add eax, dstx
|
|
add eax, dstx
|
|
; bank = start / WinGran
|
|
; dst = (start % WinGran) + sf_WriteWinPtr
|
|
mov edx, 0
|
|
div sf_WinGran
|
|
mov bank, eax
|
|
mov edi, edx
|
|
add edi, sf_WriteWinPtr
|
|
|
|
; Select new bank
|
|
mov bh, 0
|
|
mov bl, byte ptr sf_WriteWin
|
|
mov edx, bank
|
|
call sf_SetBank
|
|
; eax/edx destroyed by sf_SetBank
|
|
|
|
sf_0: ; rem = sf_WriteWinLimit - dst
|
|
mov eax, sf_WriteWinLimit
|
|
sub eax, edi
|
|
; h2 = (rem+(LineWidth-w*2))/LineWidth
|
|
add eax, new_dst_line
|
|
mov edx, 0
|
|
div linestep
|
|
; if (h<h2) h2=h
|
|
cmp h, eax
|
|
jae skplim
|
|
mov eax, h
|
|
skplim: ; if (h2==0) // No full lines can be transfered.
|
|
or eax, eax
|
|
jz sf_2
|
|
; h-= h2
|
|
; Transfer h2 lines to screen
|
|
sub h, eax
|
|
lea ebx, pal15_tbl
|
|
sf_1: mov ecx, w4 ; width/4
|
|
|
|
push eax
|
|
xor eax, eax
|
|
sf_movsd2:
|
|
mov al, [esi]
|
|
add esi, 2
|
|
mov dx, [ebx+eax*2]
|
|
mov al, [esi-1]
|
|
shl edx, 16
|
|
mov dx, [ebx+eax*2]
|
|
rol edx, 16
|
|
mov [edi], edx
|
|
add edi, 4
|
|
dec ecx
|
|
jnz sf_movsd2
|
|
pop eax
|
|
|
|
; rep movsd ;;;;;-----;;;;;
|
|
|
|
add esi, new_src_line
|
|
add edi, new_dst_line
|
|
dec eax
|
|
jnz sf_1
|
|
|
|
sf_2: ; if (h!=0) // There are still lines to be transfered
|
|
; // transfer partial line
|
|
or eax, h
|
|
jz sf_9
|
|
|
|
; w4a = 0 max (sf_WriteWinLimit-dst)/4
|
|
mov ecx, sf_WriteWinLimit
|
|
sub ecx, edi
|
|
sar ecx, 2
|
|
jns sf_2b
|
|
mov ecx, 0
|
|
sf_2b: push ecx ; Save size of first half
|
|
or ecx, ecx
|
|
jz sf_movsd3b
|
|
xor eax, eax
|
|
lea ebx, pal15_tbl
|
|
sf_movsd3:
|
|
mov al, [esi]
|
|
add esi, 2
|
|
mov dx, [ebx+eax*2]
|
|
mov al, [esi-1]
|
|
shl edx, 16
|
|
mov dx, [ebx+eax*2]
|
|
rol edx, 16
|
|
mov [edi], edx
|
|
add edi, 4
|
|
dec ecx
|
|
jnz sf_movsd3
|
|
sf_movsd3b:
|
|
|
|
; rep movsd ;;;;;-----;;;;;
|
|
|
|
; bank += WinSize/WinGran //Assumes WinSize%WinGran==0
|
|
; off -= (WinSize/WinGran)*WinGran == WinSize
|
|
|
|
mov eax, sf_WinGranPerSize
|
|
add bank, eax
|
|
sub edi, sf_WinSize
|
|
|
|
; Select new bank
|
|
mov bh, 0
|
|
mov bl, byte ptr sf_WriteWin
|
|
mov edx, bank
|
|
call sf_SetBank
|
|
; eax/edx destroyed by sf_SetBank
|
|
|
|
; w4b = w4-w4a // Size of second half
|
|
pop eax ; Size of first half
|
|
mov ecx, w4
|
|
sub ecx, eax ; Size of 2nd half
|
|
|
|
or ecx, ecx
|
|
jz sf_movsd4b
|
|
lea ebx, pal15_tbl
|
|
xor eax, eax
|
|
sf_movsd4:
|
|
mov al, [esi]
|
|
add esi, 2
|
|
mov dx, [ebx+eax*2]
|
|
mov al, [esi-1]
|
|
shl edx, 16
|
|
mov dx, [ebx+eax*2]
|
|
rol edx, 16
|
|
mov [edi], edx
|
|
add edi, 4
|
|
dec ecx
|
|
jnz sf_movsd4
|
|
sf_movsd4b:
|
|
|
|
; rep movsd ;;;;;-----;;;;;
|
|
|
|
add esi, new_src_line
|
|
add edi, new_dst_line
|
|
;; --h // Count split line
|
|
;; if (h!=0) continue
|
|
dec h
|
|
jnz sf_0
|
|
sf_9:
|
|
|
|
.endif ; sf_SetBank==0 ;------------------
|
|
|
|
ret
|
|
|
|
mve_ShowFrameFieldHi ENDP
|
|
|
|
if PARTIAL
|
|
|
|
SHOW_FRAME_CHG_BODY MACRO HI_COLOR_FLAG:REQ
|
|
|
|
LOCAL HI_COLOR_SCALE
|
|
HI_COLOR_SCALE equ HI_COLOR_FLAG+1
|
|
|
|
mov ax, ds ; Insure es==ds for symantec flat mode
|
|
mov es, ax
|
|
|
|
mov eax, w ; _width = w*SWIDTH*HI_COLOR_SCALE;
|
|
shl eax, LOG2_SWIDTH+HI_COLOR_FLAG
|
|
mov _width, eax
|
|
|
|
xor ebx, ebx ; ebx = nf_fqty (converted to 32-bits)
|
|
mov bl, nf_fqty
|
|
|
|
mov eax, nf_width ; SrcWidth = nf_width*nf_fqty;
|
|
mul ebx ;nf_fqty
|
|
mov SrcWidth, eax
|
|
imul eax, (SHEIGHT-1) ; SrcWidth7 = SrcWidth * (SHEIGHT-1)
|
|
mov SrcWidth7, eax
|
|
add eax, SrcWidth ; SrcLineStep = SrcWidth*SHEIGHT-_width
|
|
sub eax, _width
|
|
mov SrcLineStep, eax
|
|
|
|
mov eax, sf_LineWidth ; DstWidth = sf_LineWidth*nf_fqty;
|
|
mul ebx ;nf_fqty
|
|
mov DstWidth, eax
|
|
imul eax, (SHEIGHT-1) ; DstWidth7 = DstWidth * (SHEIGHT-1)
|
|
mov DstWidth7, eax
|
|
;Note: DstLineStep1+2 = DstWidth*SHEIGHT - _width = ????Not True!!!
|
|
dec eax ; DstLineStep1 = DstWidth*(SHEIGHT-1)-1
|
|
mov DstLineStep1, eax
|
|
|
|
mov eax, DstWidth ; DstLineStep2 = DstWidth-_width+1
|
|
sub eax, _width
|
|
inc eax
|
|
mov DstLineStep2, eax
|
|
|
|
mov eax, DstLineStep1 ; LineEnd = DstWidth*(SHEIGHT-1)+_width-1
|
|
add eax, _width
|
|
mov LineEnd, eax
|
|
|
|
; esi = buf (pointer into buf)
|
|
; ebx = pointer into chgs
|
|
; dx = current chgs word
|
|
; edi = pointer into screen
|
|
; ecx = remaining square lines to copy
|
|
|
|
.if prvbuf ; buf = prvbuf ? nf_buf_prv : nf_buf_cur
|
|
mov esi, nf_buf_prv
|
|
.else
|
|
mov esi, nf_buf_cur
|
|
.endif
|
|
mov eax, y ; + y*SHEIGHT*nf_WIDTH
|
|
shl eax, LOG2_SHEIGHT
|
|
mul nf_width
|
|
add esi, eax
|
|
mov eax, x ; + x*SWIDTH*HI_COLOR_SCALE
|
|
shl eax, LOG2_SWIDTH+HI_COLOR_FLAG
|
|
add esi, eax
|
|
|
|
; dstx must be a multiple of 4 because everything is done on 32-bit words
|
|
; and bank crossing checks don't check for a crossing within a word.
|
|
and dstx, NOT 3 ; dstx &= ~3
|
|
|
|
mov ebx, chgs
|
|
mov dx, 0
|
|
|
|
mov cl, nf_fqty
|
|
nxtfld: push ecx
|
|
push esi
|
|
|
|
mov ecx, h
|
|
|
|
push ebx
|
|
push edx
|
|
|
|
mov eax, sf_LineWidth
|
|
mul dsty
|
|
add eax, dstx
|
|
; bank = start / WinGran
|
|
; dst = (start % WinGran) + sf_WriteWinPtr
|
|
mov edx, 0
|
|
div sf_WinGran
|
|
mov bank, eax
|
|
mov edi, edx
|
|
add edi, sf_WriteWinPtr
|
|
|
|
; Select new bank
|
|
cmp sf_SetBank, 0
|
|
jz nobank
|
|
mov bh, 0
|
|
mov bl, byte ptr sf_WriteWin
|
|
mov edx, bank
|
|
call sf_SetBank
|
|
; eax/edx destroyed by sf_SetBank
|
|
nobank: pop edx
|
|
pop ebx
|
|
|
|
NextLine:
|
|
push ecx
|
|
|
|
mov eax, edi
|
|
add eax, LineEnd ; (SHEIGHT-1)*DstWidth+_width-1
|
|
sub eax, sf_WriteWinLimit
|
|
jb NoSplit
|
|
jmp Split
|
|
|
|
LineDone:
|
|
pop ecx
|
|
add esi, SrcLineStep ; Move back to start column, down SHEIGHT
|
|
add edi, DstLineStep1 ; First advance to last byte
|
|
add edi, DstLineStep2 ; Then advance to new start
|
|
loop NextLine
|
|
pop esi
|
|
pop ecx
|
|
add esi, nf_width
|
|
inc dsty
|
|
dec cl
|
|
jnz nxtfld
|
|
jmp Finished
|
|
|
|
; --- Copy full squares ---
|
|
|
|
; Scan over contiguous unchanged squares up to max per line
|
|
; For each unchanged square, add 8 (SWIDTH) to esi and edi.
|
|
|
|
; count # of contiguous changed squares up to max per line
|
|
; Init eax to 0, ebx and edx to line steps for source and dest.
|
|
; For each square, add 2 (SWIDTH/4) to eax and subtract 8 (SWIDTH)
|
|
; from ebx and edx.
|
|
|
|
NoSplit:
|
|
mov ecx, w
|
|
mov eax, 0
|
|
jmp fTest1
|
|
|
|
fNext1: mov dx, [ebx]
|
|
add ebx, 2
|
|
fTest1: add dx, dx
|
|
jz fNext1
|
|
jb fChgd2
|
|
add esi, SWIDTH*HI_COLOR_SCALE
|
|
add edi, SWIDTH*HI_COLOR_SCALE
|
|
loop fTest1
|
|
jmp LineDone
|
|
|
|
fNext2: mov dx, word ptr [ebx]
|
|
add ebx, 2
|
|
fTest2: add dx, dx
|
|
ja fCopy3
|
|
jz fNext2
|
|
fChgd2: add eax, SWIDTH*HI_COLOR_SCALE/4
|
|
loop fTest2
|
|
call fCopy
|
|
jmp LineDone
|
|
|
|
fCopy3: call fCopy
|
|
add esi, SWIDTH*HI_COLOR_SCALE
|
|
add edi, SWIDTH*HI_COLOR_SCALE
|
|
mov eax, 0
|
|
loop fTest1
|
|
jmp LineDone
|
|
|
|
fCopy: push ebx
|
|
push ecx
|
|
push edx
|
|
|
|
mov ecx, eax
|
|
shl ecx, 2
|
|
mov ebx, SrcWidth
|
|
sub ebx, ecx
|
|
mov edx, DstWidth
|
|
sub edx, ecx
|
|
|
|
REPEAT 7
|
|
mov ecx, eax
|
|
rep movsd
|
|
add esi, ebx
|
|
add edi, edx
|
|
ENDM
|
|
mov ecx, eax
|
|
rep movsd
|
|
|
|
sub esi, SrcWidth7
|
|
sub edi, DstWidth7
|
|
|
|
pop edx
|
|
pop ecx
|
|
pop ebx
|
|
retn
|
|
|
|
|
|
; --- Copy squares across bank boundary ---
|
|
; (occurs infrequently, but should be streamlined as much as possible
|
|
; because it could potentially be much more expensive than normal
|
|
; operation).
|
|
; HMMM... 16*640 = 10240 = approx 1/6 64K, so for 640x480,
|
|
; roughly 1 in 6 square lines will need special processing
|
|
; (actually, 2 in 12 due to interlacing, but that's the same ratio).
|
|
|
|
; Repeat above twice, once for end of cur bank, once for start
|
|
; of next bank, with following modifications:
|
|
|
|
; For end of cur bank...
|
|
; if di>=WinSize, we're done with squares (but need to correctly
|
|
; adjust si and di!)
|
|
; if di+4*cx>WinSize, we need to clip and then we're done
|
|
; (but need to correctly adjust si and di!)
|
|
; Reduce cx to (WinSize-di)/4.
|
|
|
|
; limit=WriteWinLimit-4*ax
|
|
|
|
Split: push ebx
|
|
push edx
|
|
push esi
|
|
push edi
|
|
|
|
mov ecx, w
|
|
mov eax, 0
|
|
jmp aTest1
|
|
|
|
aNext1: mov dx, word ptr [ebx]
|
|
add ebx, 2
|
|
aTest1: add dx, dx
|
|
jz aNext1
|
|
jb aChgd2
|
|
add esi, SWIDTH*HI_COLOR_SCALE
|
|
add edi, SWIDTH*HI_COLOR_SCALE
|
|
loop aTest1
|
|
jmp aDone
|
|
|
|
aNext2: mov dx, [ebx]
|
|
add ebx, 2
|
|
aTest2: add dx, dx
|
|
ja aCopy3
|
|
jz aNext2
|
|
aChgd2: add eax, SWIDTH*HI_COLOR_SCALE/4
|
|
loop aTest2
|
|
call aCopy
|
|
jmp aDone
|
|
|
|
|
|
aCopy3: call aCopy
|
|
add esi, SWIDTH*HI_COLOR_SCALE
|
|
add edi, SWIDTH*HI_COLOR_SCALE
|
|
mov eax, 0
|
|
loop aTest1
|
|
jmp aDone
|
|
|
|
aCopy: push ebx
|
|
push ecx
|
|
push edx
|
|
push esi
|
|
push edi
|
|
|
|
mov ecx, eax
|
|
shl ecx, 2
|
|
mov ebx, SrcWidth
|
|
sub ebx, ecx
|
|
mov edx, DstWidth
|
|
sub edx, ecx
|
|
sub ecx, sf_WriteWinLimit
|
|
neg ecx
|
|
mov limit, ecx
|
|
|
|
REPEAT 7
|
|
cmp edi, limit
|
|
jns aFinal
|
|
mov ecx, eax
|
|
rep movsd
|
|
add esi, ebx
|
|
add edi, edx
|
|
ENDM
|
|
cmp edi, limit
|
|
jns aFinal
|
|
mov ecx, eax
|
|
jmp aLast
|
|
|
|
aFinal: mov ecx, sf_WriteWinLimit
|
|
sub ecx, edi
|
|
js aCpyDn
|
|
shr ecx, 2
|
|
aLast: rep movsd
|
|
|
|
aCpyDn: pop edi
|
|
pop esi
|
|
mov ecx, eax
|
|
shl ecx, 2
|
|
add esi, ecx
|
|
add edi, ecx
|
|
pop edx
|
|
pop ecx
|
|
pop ebx
|
|
retn
|
|
|
|
aDone: pop edi
|
|
pop esi
|
|
|
|
; Advance bank
|
|
mov eax, sf_WinGranPerSize
|
|
add bank, eax
|
|
sub edi, sf_WinSize
|
|
|
|
; Select new bank
|
|
cmp sf_SetBank, 0
|
|
jz nobank2
|
|
mov bh, 0
|
|
mov bl, byte ptr sf_WriteWin
|
|
mov edx, bank
|
|
call sf_SetBank
|
|
; eax/edx destroyed by SetBank
|
|
nobank2:
|
|
pop edx
|
|
pop ebx
|
|
|
|
; For start of next bank...
|
|
; While di+4*cx<=0, advance si & di by src/dst line step instead of
|
|
; doing rep mov
|
|
; If di<0, cx += di/4, si-=di, di=0
|
|
; Do remaining rep mov's (first with modified args, remainder with
|
|
; with full args).
|
|
|
|
; Init bx/dx to src/dst line steps.
|
|
; limit=-4*ax
|
|
|
|
mov ecx, w
|
|
mov eax, 0
|
|
jmp bTest1
|
|
|
|
bNext1: mov dx, [ebx]
|
|
add ebx, 2
|
|
bTest1: add dx, dx
|
|
jz bNext1
|
|
jb bChgd2
|
|
add esi, SWIDTH*HI_COLOR_SCALE
|
|
add edi, SWIDTH*HI_COLOR_SCALE
|
|
loop bTest1
|
|
jmp LineDone
|
|
|
|
bNext2: mov dx, [ebx]
|
|
add ebx, 2
|
|
bTest2: add dx, dx
|
|
ja bCopy3
|
|
jz bNext2
|
|
bChgd2: add eax, SWIDTH*HI_COLOR_SCALE/4
|
|
loop bTest2
|
|
call bCopy
|
|
jmp LineDone
|
|
|
|
|
|
bCopy3: call bCopy
|
|
add esi, SWIDTH*HI_COLOR_SCALE
|
|
add edi, SWIDTH*HI_COLOR_SCALE
|
|
mov eax, 0
|
|
loop bTest1
|
|
jmp LineDone
|
|
|
|
bCopy: push ebx
|
|
push ecx
|
|
push edx
|
|
|
|
mov ecx, eax
|
|
shl ecx, 2
|
|
neg ecx
|
|
mov limit, ecx
|
|
mov ebx, SrcWidth
|
|
mov edx, DstWidth
|
|
|
|
sub edi, sf_WriteWinPtr
|
|
|
|
FOR bMovN, <bMov1,bMov2,bMov3,bMov4,bMov5,bMov6,bMov7>
|
|
mov ecx, offset bMovN
|
|
jns bFull
|
|
cmp limit, edi
|
|
js bPart
|
|
add esi, ebx
|
|
add edi, edx
|
|
ENDM
|
|
|
|
mov ecx, offset bMov8
|
|
jns bFull
|
|
cmp limit, edi
|
|
js bPart
|
|
|
|
add edi, sf_WriteWinPtr
|
|
|
|
shl eax, 2
|
|
add esi, eax
|
|
add edi, eax
|
|
|
|
jmp bCpyDn
|
|
|
|
|
|
bFull: push ecx
|
|
mov ecx, eax
|
|
add ebx, limit
|
|
add edx, limit
|
|
add edi, sf_WriteWinPtr
|
|
retn
|
|
|
|
bPart: push ecx
|
|
mov ecx, eax
|
|
sub esi, edi
|
|
sar edi, 2
|
|
add ecx, edi
|
|
mov edi, sf_WriteWinPtr
|
|
add ebx, limit
|
|
add edx, limit
|
|
retn
|
|
|
|
|
|
FOR bMovN, <bMov1,bMov2,bMov3,bMov4,bMov5,bMov6,bMov7>
|
|
bMovN: rep movsd
|
|
mov ecx, eax
|
|
add esi, ebx
|
|
add edi, edx
|
|
ENDM
|
|
bMov8: rep movsd
|
|
|
|
bCpyDn: sub esi, SrcWidth7
|
|
sub edi, DstWidth7
|
|
|
|
pop edx
|
|
pop ecx
|
|
pop ebx
|
|
retn
|
|
|
|
Finished:
|
|
ret
|
|
|
|
ENDM ; SHOW_FRAME_CHG_BODY
|
|
|
|
;void
|
|
;mve_sfShowFrameChg(
|
|
; bool prvbuf,
|
|
; unsigned x, unsigned y, unsigned w, unsigned h,
|
|
; unsigned short *chgs,
|
|
; unsigned dstx, unsigned dsty)
|
|
;
|
|
mve_sfShowFrameChg PROC USES ESI EDI EBX, \
|
|
prvbuf:DWORD, \
|
|
x:DWORD, y:DWORD, w:DWORD, h:DWORD, \
|
|
chgs:PTRWORD, \
|
|
dstx:DWORD, dsty:DWORD
|
|
LOCAL _width:DWORD
|
|
LOCAL SrcWidth:DWORD
|
|
LOCAL DstWidth:DWORD
|
|
LOCAL SrcWidth7:DWORD
|
|
LOCAL DstWidth7:DWORD
|
|
LOCAL SrcLineStep:DWORD
|
|
LOCAL DstLineStep1:DWORD
|
|
LOCAL DstLineStep2:DWORD
|
|
LOCAL LineEnd:DWORD
|
|
LOCAL bank:DWORD
|
|
LOCAL limit:DWORD
|
|
|
|
SHOW_FRAME_CHG_BODY 0 ; Not HiColor
|
|
|
|
mve_sfShowFrameChg ENDP
|
|
|
|
|
|
if HICOLOR
|
|
|
|
;void
|
|
;mve_sfHiColorShowFrameChg(
|
|
; bool prvbuf,
|
|
; unsigned x, unsigned y, unsigned w, unsigned h,
|
|
; unsigned short *chgs,
|
|
; unsigned dstx, unsigned dsty)
|
|
;
|
|
mve_sfHiColorShowFrameChg PROC USES ESI EDI EBX, \
|
|
prvbuf:DWORD, \
|
|
x:DWORD, y:DWORD, w:DWORD, h:DWORD, \
|
|
chgs:PTRWORD, \
|
|
dstx:DWORD, dsty:DWORD
|
|
LOCAL _width:DWORD
|
|
LOCAL SrcWidth:DWORD
|
|
LOCAL DstWidth:DWORD
|
|
LOCAL SrcWidth7:DWORD
|
|
LOCAL DstWidth7:DWORD
|
|
LOCAL SrcLineStep:DWORD
|
|
LOCAL DstLineStep1:DWORD
|
|
LOCAL DstLineStep2:DWORD
|
|
LOCAL LineEnd:DWORD
|
|
LOCAL bank:DWORD
|
|
LOCAL limit:DWORD
|
|
|
|
SHOW_FRAME_CHG_BODY 1 ; HiColor
|
|
|
|
mve_sfHiColorShowFrameChg ENDP
|
|
|
|
|
|
endif ;HICOLOR
|
|
|
|
endif ;PARTIAL
|
|
|
|
|
|
;----------------------------------------------------------------------
|
|
|
|
if 0 ; No supported
|
|
if PKDATA
|
|
|
|
PK_SHOW_FRAME_CHG_BODY MACRO HI_COLOR_FLAG:REQ
|
|
|
|
LOCAL HI_COLOR_SCALE
|
|
HI_COLOR_SCALE equ HI_COLOR_FLAG+1
|
|
|
|
mov eax, w ; _width = w*SWIDTH*HI_COLOR_SCALE;
|
|
shl eax, LOG2_SWIDTH+HI_COLOR_FLAG
|
|
mov _width, eax
|
|
|
|
xor ebx, ebx ; ebx = nf_fqty (converted to 32-bits)
|
|
mov bl, nf_fqty
|
|
|
|
mov eax, nf_width ; SrcWidth = nf_width*nf_fqty;
|
|
mul ebx ;nf_fqty
|
|
mov SrcWidth, eax
|
|
imul eax, (SHEIGHT-1) ; SrcWidth7 = SrcWidth * (SHEIGHT-1)
|
|
mov SrcWidth7, eax
|
|
add eax, SrcWidth ; SrcLineStep = SrcWidth*SHEIGHT-_width
|
|
sub eax, _width
|
|
mov SrcLineStep, eax
|
|
|
|
mov eax, sf_LineWidth ; DstWidth = sf_LineWidth*nf_fqty;
|
|
mul ebx ;nf_fqty
|
|
mov DstWidth, eax
|
|
imul eax, (SHEIGHT-1) ; DstWidth7 = DstWidth * (SHEIGHT-1)
|
|
mov DstWidth7, eax
|
|
;Note: DstLineStep1+2 = DstWidth*SHEIGHT - _width = ????Not True!!!
|
|
dec eax ; DstLineStep1 = DstWidth*(SHEIGHT-1)-1
|
|
mov DstLineStep1, eax
|
|
|
|
mov eax, DstWidth ; DstLineStep2 = DstWidth-_width+1
|
|
sub eax, _width
|
|
inc eax
|
|
mov DstLineStep2, eax
|
|
|
|
mov eax, DstLineStep1 ; LineEnd = DstWidth*(SHEIGHT-1)+_width-1
|
|
add eax, _width
|
|
mov LineEnd, eax
|
|
|
|
; esi = buf (pointer into buf)
|
|
; ebx = pointer into ops
|
|
; dx = temp for current op. dl xor dh keeps just upper nibble op.
|
|
; edi = pointer into screen
|
|
; ecx = remaining square lines to copy
|
|
|
|
.if prvbuf ; buf = prvbuf ? nf_buf_prv : nf_buf_cur
|
|
mov esi, nf_buf_prv
|
|
.else
|
|
mov esi, nf_buf_cur
|
|
.endif
|
|
mov eax, y ; + y*SHEIGHT*nf_WIDTH
|
|
shl eax, LOG2_SHEIGHT
|
|
mul nf_width
|
|
add esi, eax
|
|
mov eax, x ; + x*SWIDTH*HI_COLOR_SCALE
|
|
shl eax, LOG2_SWIDTH+HI_COLOR_FLAG
|
|
add esi, eax
|
|
|
|
; dstx must be a multiple of 4 because everything is done on 32-bit words
|
|
; and bank crossing checks don't check for a crossing within a word.
|
|
and dstx, NOT 3 ; dstx &= ~3
|
|
|
|
mov ebx, ops
|
|
|
|
mov cl, nf_fqty
|
|
nxtfld: push ecx
|
|
push esi
|
|
|
|
mov ecx, h
|
|
|
|
push ebx
|
|
|
|
mov eax, sf_LineWidth
|
|
mul dsty
|
|
add eax, dstx
|
|
; bank = start / WinGran
|
|
; dst = (start % WinGran) + sf_WriteWinPtr
|
|
mov edx, 0
|
|
div sf_WinGran
|
|
mov bank, eax
|
|
mov edi, edx
|
|
add edi, sf_WriteWinPtr
|
|
|
|
; Select new bank
|
|
cmp sf_SetBank, 0
|
|
jz nobank
|
|
mov bh, 0
|
|
mov bl, byte ptr sf_WriteWin
|
|
mov edx, bank
|
|
call sf_SetBank
|
|
; eax/edx destroyed by sf_SetBank
|
|
nobank: pop ebx
|
|
|
|
NextLine:
|
|
push ecx
|
|
|
|
mov eax, edi
|
|
add eax, LineEnd ; (SHEIGHT-1)*DstWidth+_width-1
|
|
sub eax, sf_WriteWinLimit
|
|
jb NoSplit
|
|
jmp Split
|
|
|
|
LineDone:
|
|
pop ecx
|
|
add esi, SrcLineStep ; Move back to start column, down SHEIGHT
|
|
add edi, DstLineStep1 ; First advance to last byte
|
|
add edi, DstLineStep2 ; Then advance to new start
|
|
loop NextLine
|
|
pop esi
|
|
pop ecx
|
|
add esi, nf_width
|
|
inc dsty
|
|
dec cl
|
|
jnz nxtfld
|
|
jmp Finished
|
|
|
|
; --- Copy full squares ---
|
|
|
|
; Scan over contiguous unchanged squares up to max per line
|
|
; For each unchanged square, add 8 (SWIDTH) to esi and edi.
|
|
|
|
; count # of contiguous changed squares up to max per line
|
|
; Init eax to 0, ebx and edx to line steps for source and dest.
|
|
; For each square, add 2 (SWIDTH/4) to eax and subtract 8 (SWIDTH)
|
|
; from ebx and edx.
|
|
|
|
NoSplit:
|
|
mov ecx, w
|
|
shr ecx, 1
|
|
mov eax, 0
|
|
fNext1: mov dl, [ebx]
|
|
inc ebx
|
|
mov dh, dl
|
|
and dh, 0Fh
|
|
jnz fChgd2a
|
|
fTest1a:add esi, SWIDTH*HI_COLOR_SCALE
|
|
add edi, SWIDTH*HI_COLOR_SCALE
|
|
xor dl, dh
|
|
jnz fChgd2b
|
|
fTest1b:add esi, SWIDTH*HI_COLOR_SCALE
|
|
add edi, SWIDTH*HI_COLOR_SCALE
|
|
dec ecx
|
|
jnz fNext1
|
|
jmp LineDone
|
|
|
|
fNext2: mov dl, [ebx]
|
|
inc ebx
|
|
mov dh, dl
|
|
and dh, 0Fh
|
|
jz fCopy3a
|
|
fChgd2a:add eax, SWIDTH*HI_COLOR_SCALE/4
|
|
xor dl, dh
|
|
jz fCopy3b
|
|
fChgd2b:add eax, SWIDTH*HI_COLOR_SCALE/4
|
|
dec ecx
|
|
jnz fNext2
|
|
call fCopy
|
|
jmp LineDone
|
|
|
|
fCopy3a:call fCopy
|
|
xor eax, eax
|
|
jmp fTest1a
|
|
|
|
fCopy3b:call fCopy
|
|
xor eax, eax
|
|
jmp fTest1b
|
|
|
|
fCopy: push ebx
|
|
push ecx
|
|
push edx
|
|
|
|
mov ecx, eax
|
|
shl ecx, 2
|
|
mov ebx, SrcWidth
|
|
sub ebx, ecx
|
|
mov edx, DstWidth
|
|
sub edx, ecx
|
|
|
|
REPEAT 7
|
|
mov ecx, eax
|
|
rep movsd
|
|
add esi, ebx
|
|
add edi, edx
|
|
ENDM
|
|
mov ecx, eax
|
|
rep movsd
|
|
|
|
sub esi, SrcWidth7
|
|
sub edi, DstWidth7
|
|
|
|
pop edx
|
|
pop ecx
|
|
pop ebx
|
|
retn
|
|
|
|
|
|
; --- Copy squares across bank boundary ---
|
|
; (occurs infrequently, but should be streamlined as much as possible
|
|
; because it could potentially be much more expensive than normal
|
|
; operation).
|
|
; HMMM... 16*640 = 10240 = approx 1/6 64K, so for 640x480,
|
|
; roughly 1 in 6 square lines will need special processing
|
|
; (actually, 2 in 12 due to interlacing, but that's the same ratio).
|
|
|
|
; Repeat above twice, once for end of cur bank, once for start
|
|
; of next bank, with following modifications:
|
|
|
|
; For end of cur bank...
|
|
; if di>=WinSize, we're done with squares (but need to correctly
|
|
; adjust si and di!)
|
|
; if di+4*cx>WinSize, we need to clip and then we're done
|
|
; (but need to correctly adjust si and di!)
|
|
; Reduce cx to (WinSize-di)/4.
|
|
|
|
; limit=WriteWinLimit-4*ax
|
|
|
|
Split: push ebx
|
|
push esi
|
|
push edi
|
|
|
|
mov ecx, w
|
|
shr ecx, 1
|
|
mov eax, 0
|
|
aNext1: mov dl, [ebx]
|
|
inc ebx
|
|
mov dh, dl
|
|
and dh, 0Fh
|
|
jnz aChgd2a
|
|
aTest1a:add esi, SWIDTH*HI_COLOR_SCALE
|
|
add edi, SWIDTH*HI_COLOR_SCALE
|
|
xor dl, dh
|
|
jnz aChgd2b
|
|
aTest1b:add esi, SWIDTH*HI_COLOR_SCALE
|
|
add edi, SWIDTH*HI_COLOR_SCALE
|
|
dec ecx
|
|
jnz aNext1
|
|
jmp LineDone
|
|
|
|
aNext2: mov dl, [ebx]
|
|
inc ebx
|
|
mov dh, dl
|
|
and dh, 0Fh
|
|
jz aCopy3a
|
|
aChgd2a:add eax, SWIDTH*HI_COLOR_SCALE/4
|
|
xor dl, dh
|
|
jz aCopy3b
|
|
aChgd2b:add eax, SWIDTH*HI_COLOR_SCALE/4
|
|
dec ecx
|
|
jnz aNext2
|
|
call aCopy
|
|
jmp LineDone
|
|
|
|
aCopy3a:call aCopy
|
|
xor eax, eax
|
|
jmp aTest1a
|
|
|
|
aCopy3b:call aCopy
|
|
xor eax, eax
|
|
jmp aTest1b
|
|
|
|
aCopy: push ebx
|
|
push ecx
|
|
push edx
|
|
push esi
|
|
push edi
|
|
|
|
mov ecx, eax
|
|
shl ecx, 2
|
|
mov ebx, SrcWidth
|
|
sub ebx, ecx
|
|
mov edx, DstWidth
|
|
sub edx, ecx
|
|
sub ecx, sf_WriteWinLimit
|
|
neg ecx
|
|
mov limit, ecx
|
|
|
|
REPEAT 7
|
|
cmp edi, limit
|
|
jns aFinal
|
|
mov ecx, eax
|
|
rep movsd
|
|
add esi, ebx
|
|
add edi, edx
|
|
ENDM
|
|
cmp edi, limit
|
|
jns aFinal
|
|
mov ecx, eax
|
|
jmp aLast
|
|
|
|
aFinal: mov ecx, sf_WriteWinLimit
|
|
sub ecx, edi
|
|
js aCpyDn
|
|
shr ecx, 2
|
|
aLast: rep movsd
|
|
|
|
aCpyDn: pop edi
|
|
pop esi
|
|
mov ecx, eax
|
|
shl ecx, 2
|
|
add esi, ecx
|
|
add edi, ecx
|
|
pop edx
|
|
pop ecx
|
|
pop ebx
|
|
retn
|
|
|
|
aDone: pop edi
|
|
pop esi
|
|
|
|
; Advance bank
|
|
mov eax, sf_WinGranPerSize
|
|
add bank, eax
|
|
sub edi, sf_WinSize
|
|
|
|
; Select new bank
|
|
cmp sf_SetBank, 0
|
|
jz nobank2
|
|
mov bh, 0
|
|
mov bl, byte ptr sf_WriteWin
|
|
mov edx, bank
|
|
call sf_SetBank
|
|
; eax/edx destroyed by SetBank
|
|
nobank2:
|
|
pop ebx
|
|
|
|
; For start of next bank...
|
|
; While di+4*cx<=0, advance si & di by src/dst line step instead of
|
|
; doing rep mov
|
|
; If di<0, cx += di/4, si-=di, di=0
|
|
; Do remaining rep mov's (first with modified args, remainder with
|
|
; with full args).
|
|
|
|
; Init bx/dx to src/dst line steps.
|
|
; limit=-4*ax
|
|
|
|
mov ecx, w
|
|
shr ecx, 1
|
|
mov eax, 0
|
|
bNext1: mov dl, [ebx]
|
|
inc ebx
|
|
mov dh, dl
|
|
and dh, 0Fh
|
|
jnz bChgd2a
|
|
bTest1a:add esi, SWIDTH*HI_COLOR_SCALE
|
|
add edi, SWIDTH*HI_COLOR_SCALE
|
|
xor dl, dh
|
|
jnz bChgd2b
|
|
bTest1b:add esi, SWIDTH*HI_COLOR_SCALE
|
|
add edi, SWIDTH*HI_COLOR_SCALE
|
|
dec ecx
|
|
jnz bNext1
|
|
jmp LineDone
|
|
|
|
bNext2: mov dl, [ebx]
|
|
inc ebx
|
|
mov dh, dl
|
|
and dh, 0Fh
|
|
jz bCopy3a
|
|
bChgd2a:add eax, SWIDTH*HI_COLOR_SCALE/4
|
|
xor dl, dh
|
|
jz bCopy3b
|
|
bChgd2b:add eax, SWIDTH*HI_COLOR_SCALE/4
|
|
dec ecx
|
|
jnz bNext2
|
|
call bCopy
|
|
jmp LineDone
|
|
|
|
bCopy3a:call bCopy
|
|
xor eax, eax
|
|
jmp bTest1a
|
|
|
|
bCopy3b:call bCopy
|
|
xor eax, eax
|
|
jmp bTest1b
|
|
|
|
bCopy: push ebx
|
|
push ecx
|
|
push edx
|
|
|
|
mov ecx, eax
|
|
shl ecx, 2
|
|
neg ecx
|
|
mov limit, ecx
|
|
mov ebx, SrcWidth
|
|
mov edx, DstWidth
|
|
|
|
sub edi, sf_WriteWinPtr
|
|
|
|
FOR bMovN, <bMov1,bMov2,bMov3,bMov4,bMov5,bMov6,bMov7>
|
|
mov ecx, offset bMovN
|
|
jns bFull
|
|
cmp limit, edi
|
|
js bPart
|
|
add esi, ebx
|
|
add edi, edx
|
|
ENDM
|
|
|
|
mov ecx, offset bMov8
|
|
jns bFull
|
|
cmp limit, edi
|
|
js bPart
|
|
|
|
add edi, sf_WriteWinPtr
|
|
|
|
shl eax, 2
|
|
add esi, eax
|
|
add edi, eax
|
|
|
|
jmp bCpyDn
|
|
|
|
|
|
bFull: push ecx
|
|
mov ecx, eax
|
|
add ebx, limit
|
|
add edx, limit
|
|
add edi, sf_WriteWinPtr
|
|
retn
|
|
|
|
bPart: push ecx
|
|
mov ecx, eax
|
|
sub esi, edi
|
|
sar edi, 2
|
|
add ecx, edi
|
|
mov edi, sf_WriteWinPtr
|
|
add ebx, limit
|
|
add edx, limit
|
|
retn
|
|
|
|
|
|
FOR bMovN, <bMov1,bMov2,bMov3,bMov4,bMov5,bMov6,bMov7>
|
|
bMovN: rep movsd
|
|
mov ecx, eax
|
|
add esi, ebx
|
|
add edi, edx
|
|
ENDM
|
|
bMov8: rep movsd
|
|
|
|
bCpyDn: sub esi, SrcWidth7
|
|
sub edi, DstWidth7
|
|
|
|
pop edx
|
|
pop ecx
|
|
pop ebx
|
|
retn
|
|
|
|
Finished:
|
|
ret
|
|
|
|
ENDM ; PK_SHOW_FRAME_CHG_BODY
|
|
|
|
;void
|
|
;mve_sfPkShowFrameChg(
|
|
; bool prvbuf,
|
|
; unsigned x, unsigned y, unsigned w, unsigned h,
|
|
; unsigned char *ops,
|
|
; unsigned dstx, unsigned dsty)
|
|
;
|
|
mve_sfPkShowFrameChg PROC USES ESI EDI EBX, \
|
|
prvbuf:DWORD, \
|
|
x:DWORD, y:DWORD, w:DWORD, h:DWORD, \
|
|
ops:PTRBYTE, \
|
|
dstx:DWORD, dsty:DWORD
|
|
LOCAL _width:DWORD
|
|
LOCAL SrcWidth:DWORD
|
|
LOCAL DstWidth:DWORD
|
|
LOCAL SrcWidth7:DWORD
|
|
LOCAL DstWidth7:DWORD
|
|
LOCAL SrcLineStep:DWORD
|
|
LOCAL DstLineStep1:DWORD
|
|
LOCAL DstLineStep2:DWORD
|
|
LOCAL LineEnd:DWORD
|
|
LOCAL bank:DWORD
|
|
LOCAL limit:DWORD
|
|
|
|
PK_SHOW_FRAME_CHG_BODY 0 ; Not HiColor
|
|
|
|
mve_sfPkShowFrameChg ENDP
|
|
|
|
|
|
if HICOLOR
|
|
|
|
;void
|
|
;mve_sfPkHiColorShowFrameChg(
|
|
; bool prvbuf,
|
|
; unsigned x, unsigned y, unsigned w, unsigned h,
|
|
; unsigned char *ops,
|
|
; unsigned dstx, unsigned dsty)
|
|
;
|
|
mve_sfPkHiColorShowFrameChg PROC USES ESI EDI EBX, \
|
|
prvbuf:DWORD, \
|
|
x:DWORD, y:DWORD, w:DWORD, h:DWORD, \
|
|
ops:PTRBYTE, \
|
|
dstx:DWORD, dsty:DWORD
|
|
LOCAL _width:DWORD
|
|
LOCAL SrcWidth:DWORD
|
|
LOCAL DstWidth:DWORD
|
|
LOCAL SrcWidth7:DWORD
|
|
LOCAL DstWidth7:DWORD
|
|
LOCAL SrcLineStep:DWORD
|
|
LOCAL DstLineStep1:DWORD
|
|
LOCAL DstLineStep2:DWORD
|
|
LOCAL LineEnd:DWORD
|
|
LOCAL bank:DWORD
|
|
LOCAL limit:DWORD
|
|
|
|
PK_SHOW_FRAME_CHG_BODY 1 ; HiColor
|
|
|
|
mve_sfPkHiColorShowFrameChg ENDP
|
|
|
|
|
|
endif ;HICOLOR
|
|
|
|
endif ;PKDATA
|
|
|
|
endif
|
|
|
|
;---------------------------------------------------------------------
|
|
; Palette Management
|
|
;---------------------
|
|
|
|
;void __cdecl
|
|
;MVE_SetPalette(unsigned char *p, unsigned start, unsigned count)
|
|
;
|
|
MVE_SetPalette PROC USES ESI EBX, \
|
|
p:PTRBYTE, start:DWORD, count:DWORD
|
|
mov eax, start
|
|
mov ecx, count
|
|
mov esi, p
|
|
.if eax>=256 ; if (start>=256) return;
|
|
ret
|
|
.endif
|
|
lea ebx, [eax+ecx] ; if (start+count>256)
|
|
.if ebx>256
|
|
mov ecx, 256 ; count = 256-start
|
|
sub ecx, eax
|
|
.endif
|
|
add esi, eax ; p += start*3
|
|
add esi, eax
|
|
add esi, eax
|
|
lea ecx, [ecx+2*ecx] ; count *= 3
|
|
|
|
mov edx, 03c8h ; DAC Write Index Register
|
|
out dx, al ; Init write index to start
|
|
inc edx ; DAC Data Register
|
|
rep outsb
|
|
ret
|
|
MVE_SetPalette ENDP
|
|
|
|
|
|
; If at least 11 palette entries aren't changed, this is more compact
|
|
; than uncompressed 256 entry palette.
|
|
;
|
|
;static void palLoadCompPalette(unsigned char *buf)
|
|
;
|
|
palLoadCompPalette PROC USES ESI EDI, \
|
|
buf: PTRBYTE
|
|
mov ax, ds ; Insure es==ds for symantec flat mode
|
|
mov es, ax
|
|
|
|
mov cx, 32
|
|
mov esi, buf
|
|
mov edi, offset pal_tbl
|
|
next: lodsb
|
|
or al, al
|
|
jnz chk0
|
|
add edi, 24
|
|
loop next
|
|
jmp done
|
|
|
|
chk0: test al, 1
|
|
jz not0
|
|
movsw
|
|
movsb
|
|
test al, 2
|
|
jz not1
|
|
cpy1: movsw
|
|
movsb
|
|
test al, 4
|
|
jz not2
|
|
cpy2: movsw
|
|
movsb
|
|
test al, 8
|
|
jz not3
|
|
cpy3: movsw
|
|
movsb
|
|
test al, 16
|
|
jz not4
|
|
cpy4: movsw
|
|
movsb
|
|
test al, 32
|
|
jz not5
|
|
cpy5: movsw
|
|
movsb
|
|
test al, 64
|
|
jz not6
|
|
cpy6: movsw
|
|
movsb
|
|
or al, al
|
|
jns not7
|
|
cpy7: movsw
|
|
movsb
|
|
loop next
|
|
jmp done
|
|
|
|
not0: add edi, 3
|
|
test al, 2
|
|
jnz cpy1
|
|
not1: add edi, 3
|
|
test al, 4
|
|
jnz cpy2
|
|
not2: add edi, 3
|
|
test al, 8
|
|
jnz cpy3
|
|
not3: add edi, 3
|
|
test al, 16
|
|
jnz cpy4
|
|
not4: add edi, 3
|
|
test al, 32
|
|
jnz cpy5
|
|
not5: add edi, 3
|
|
test al, 64
|
|
jnz cpy6
|
|
not6: add edi, 3
|
|
or al, al
|
|
js cpy7
|
|
not7: add edi, 3
|
|
loop next
|
|
|
|
done: ret
|
|
|
|
palLoadCompPalette ENDP
|
|
|
|
;-----------------------------------------------------------------------
|
|
; Graphics
|
|
;----------
|
|
|
|
gfxMode proc USES EBP ESI EDI EBX, mode:DWORD
|
|
mov eax, mode
|
|
int 10h
|
|
ret
|
|
gfxMode endp
|
|
|
|
gfxLoadCrtc proc USES ESI EDI EBX, crtc:PTRBYTE, chain4:BYTE, res:BYTE
|
|
|
|
mov edx, 03c4h ; alter sequence registers
|
|
mov al, 04h ; disable or enable chain 4 in memory mode
|
|
mov ah, chain4
|
|
out dx, ax
|
|
|
|
mov dx, 03dah ; General Input State #1 register
|
|
|
|
l1: in al, dx ; Loop until vertical retrace is off
|
|
test al, 8
|
|
jnz l1
|
|
l2: in al, dx ; Now loop until it's back on
|
|
test al, 8
|
|
jz l2
|
|
|
|
cli ; turn off all interrupts
|
|
mov edx, 03c4h ; Sequencer Synchronous reset
|
|
mov eax, 0100h ; Set sequencer reset
|
|
out dx, ax
|
|
mov edx, 03c2h ; Misc Output Register
|
|
mov al, res ; 25/28-mHz, 350/400/480 lines
|
|
out dx, al
|
|
mov edx, 03c4h ; Sequencer Synchronous reset
|
|
mov eax, 0300h ; Clear sequencer reset
|
|
out dx, ax
|
|
|
|
mov edx, 03d4h ; 6845 CRTC
|
|
mov esi, crtc ; tweaked values for CRTC registers
|
|
mov al, 011h ; deprotect CRTC registers 0-7
|
|
mov ah, [esi+011h]
|
|
and ah, 07Fh
|
|
out dx, ax
|
|
|
|
mov ecx, 018h ; Update CRTC registers with tweaked values
|
|
mov ebx, 0
|
|
l3: mov al, bl
|
|
mov ah, [esi+ebx]
|
|
out dx, ax
|
|
inc bl
|
|
loop l3
|
|
|
|
sti ; restore interrupts
|
|
|
|
ret
|
|
gfxLoadCrtc endp
|
|
|
|
; void __cdecl gfxGetCrtc(unsigned char *crtc);
|
|
;
|
|
gfxGetCrtc proc USES ESI EBX, crtc:PTRBYTE
|
|
mov edx, 03d4h ; 6845 CRTC
|
|
mov esi, crtc
|
|
mov ecx, 018h
|
|
mov ebx, 0
|
|
l3: mov al, bl
|
|
out dx, al
|
|
inc dx
|
|
in al, dx
|
|
dec dx
|
|
mov [esi+ebx], al
|
|
inc bl
|
|
loop l3
|
|
ret
|
|
gfxGetCrtc endp
|
|
|
|
; void __cdecl gfxVres(unsigned char misc, unsigned char *crtc);
|
|
; misc is one of the following:
|
|
; 350: 0x23 | 0x80 (2)
|
|
; 400: 0x23 | 0x40 (1)
|
|
; 480: 0x23 | 0xc0 (3)
|
|
|
|
; Get crtc register specified by crtc_addr into ah.
|
|
; To update register, do out dx,ax
|
|
GetCrtc MACRO crtc_addr
|
|
mov al, crtc_addr
|
|
out dx, al
|
|
inc dx
|
|
in al, dx
|
|
dec dx
|
|
mov ah, al
|
|
mov al, crtc_addr
|
|
ENDM
|
|
|
|
gfxVres PROC USES EBX, misc:BYTE, crtc:PTRBYTE
|
|
|
|
mov edx, 03dah ; General Input State #1 register
|
|
|
|
l1: in al, dx ; Loop until vertical retrace is off
|
|
test al, 8
|
|
jnz l1
|
|
l2: in al, dx ; Now loop until it's back on
|
|
test al, 8
|
|
jz l2
|
|
|
|
cli ; turn off all interrupts
|
|
|
|
mov edx, 03c4h ; Sequencer Synchronous reset
|
|
mov eax, 0100h ; Set sequencer reset
|
|
out dx, ax
|
|
mov edx, 03cch ; Misc Output Register (read port)
|
|
in al, dx
|
|
and al, 03fh ; Keep all but lines field
|
|
mov edx, 03c2h ; Misc Output Register (write port)
|
|
and misc, 0c0h ; Only keep lines field
|
|
or al, misc ; 350/400/480 lines
|
|
out dx, al
|
|
mov edx, 03c4h ; Sequencer Synchronous reset
|
|
mov eax, 0300h ; Clear sequencer reset
|
|
out dx, ax
|
|
|
|
mov edx, 03d4h ; CRTC address port
|
|
mov ebx, crtc ; Desired CRTC image
|
|
|
|
GetCrtc 011h ; Vertical Retrace End register
|
|
and ah, 07Fh ; Deprotect CRTC registers 0-7
|
|
out dx, ax
|
|
|
|
GetCrtc 03h ; End Horizontal Blanking register
|
|
or ah, 080h ; Enable CRTC registers 10-11
|
|
out dx, ax
|
|
|
|
mov al, 06h ; Vertical Total register
|
|
mov ah, byte ptr 06h[ebx]
|
|
out dx, ax
|
|
|
|
GetCrtc 07h ; Overflow register
|
|
and ah, 010h ; (Preserve LC)
|
|
or ah, byte ptr 07h[ebx]
|
|
out dx, ax
|
|
|
|
GetCrtc 09h ; Maximum Scan Line register
|
|
and ah, 040h ; (Preserve LC)
|
|
or ah, byte ptr 09h[ebx]
|
|
out dx, ax
|
|
|
|
mov al, 010h ; Vertical Retrace Start register
|
|
mov ah, byte ptr 010h[ebx]
|
|
out dx, ax
|
|
|
|
GetCrtc 11h ; Vertical Retrace End register
|
|
and ah, 070h ; (Preserve BW,DVI,CVI)
|
|
or ah, byte ptr 011h[ebx]
|
|
or ah, 080h ; Reprotect 0-7
|
|
out dx, ax
|
|
|
|
mov al, 012h ; Vertical Display End register
|
|
mov ah, byte ptr 012h[ebx]
|
|
out dx, ax
|
|
|
|
mov al, 015h ; Start Vertical Blank register
|
|
mov ah, byte ptr 015h[ebx]
|
|
out dx, ax
|
|
|
|
; Some SVGA's use 7-bit vbe, others 8-bit vbe!
|
|
if 0
|
|
GetCrtc 16h ; End Vertical Blank register
|
|
and ah, 080h ; (Preserve reserved field)
|
|
or ah, byte ptr 016h[ebx]
|
|
out dx, ax
|
|
else
|
|
mov al, 16h ; End Vertical Blank register
|
|
mov ah, byte ptr 016h[ebx]
|
|
out dx, ax
|
|
endif
|
|
|
|
sti ; restore interrupts
|
|
|
|
ret
|
|
gfxVres ENDP
|
|
|
|
; void __cdecl MVE_gfxWaitRetrace(unsigned state);
|
|
;
|
|
MVE_gfxWaitRetrace proc state:DWORD
|
|
mov edx, 03dah ; Input Status #1 register
|
|
mov eax, state
|
|
or eax, eax
|
|
jnz wt1
|
|
wt0: in al, dx ; Wait for retrace off
|
|
and al, 8
|
|
jnz wt0
|
|
ret
|
|
|
|
wt1: in al, dx ; Wait for retrace on
|
|
and al, 8
|
|
jz wt1
|
|
ret
|
|
|
|
MVE_gfxWaitRetrace endp
|
|
|
|
; void __cdecl MVE_gfxSetSplit(unsigned line)
|
|
;
|
|
MVE_gfxSetSplit proc line:DWORD
|
|
mov edx, 03dah ; Input State #1 register
|
|
wt0: in al, dx ; Wait for retrace off
|
|
and al, 8
|
|
jnz wt0
|
|
wt1: in al, dx ; Wait for retrace on
|
|
and al, 8
|
|
jz wt1
|
|
|
|
mov edx, 03d4h ; CRTC address port
|
|
|
|
mov ecx, line
|
|
shr ecx, 4
|
|
and cl, 010h
|
|
GetCrtc 07h ; Overflow Register
|
|
and ah, 0EFh ; LC8 (mask=10h)
|
|
or ah, cl
|
|
out dx, ax
|
|
|
|
mov ecx, line
|
|
shr ecx, 3
|
|
and cl, 040h
|
|
GetCrtc 09h ; Maximum Scan Line Register
|
|
and ah, 0BFh ; LC9 (mask=40h)
|
|
or ah, cl
|
|
out dx, ax
|
|
|
|
mov al, 18h ; Line Compare Register
|
|
mov ah, byte ptr line
|
|
out dx, ax
|
|
|
|
ret
|
|
|
|
MVE_gfxSetSplit endp
|
|
|
|
;----------------------------------------------------------------------
|
|
|
|
mveliba_end:
|
|
|
|
|
|
END
|