mirror of
https://github.com/kevinbentley/Descent3.git
synced 2025-01-22 19:55:23 +00:00
6c8977caf0
The vast majority of this is fixing up `char *` that should be `const char *` but a handful of other fixes, like potential buffer overflows that GCC noticed, etc, were applied as well. This removes `-Wno-write-strings` from CMakeLists.txt, as it is no longer necessary, as there is no longer a flood of compiler warning spam when building. This does not fix all compiler warnings; there are still a handful, and they are legitimate, but they can be dealt with in a future commit.
3633 lines
68 KiB
NASM
3633 lines
68 KiB
NASM
; mvelibwa.c
|
|
;
|
|
; Interplay Movie (MVE) File Player Library (32-Bit Win95 Version)
|
|
; Assembly Language Components
|
|
; Written by Paul Allen Edelstein
|
|
;
|
|
; (c) 1997 Interplay Productions. All Rights Reserved.
|
|
; This file is confidential and consists of proprietary information
|
|
; of Interplay Productions. This file and associated libraries
|
|
; may not, in whole or in part, be disclosed to third parties,
|
|
; incorporated into any software product which is not being created
|
|
; for Interplay Productions, copied or duplicated in any form,
|
|
; without the prior written permission of Interplay Productions.
|
|
; Further, you may not reverse engineer, decompile or otherwise
|
|
; attempt to derive source code of this material.
|
|
;
|
|
|
|
;;--- Options ---
|
|
|
|
ONLYNEW equ 0 ; For debug, disables motion comp
|
|
LOGGING equ 0 ; Log timing statistics
|
|
PARTIAL equ 1 ; Support for partial updates
|
|
PKDATA equ 1 ; Support for packed data
|
|
HICOLOR equ 1 ; Support for HiColor
|
|
INTERP equ 0 ; Interpolated squares
|
|
; 0:none (4x4x8), 1:generic dither,
|
|
; 2:direction dither, 3:blend
|
|
COMPOPS equ 1 ; Compressed opcode table
|
|
SCALING equ 1 ; Scaling support
|
|
DECOMPD equ 0 ; Support for dithered half vert res
|
|
TRANS16 equ 1 ; Support for translating 16-bit rgb format
|
|
|
|
;;--- Constants ---
|
|
|
|
; Width and height of sections in pixels.
|
|
SWIDTH equ 8
|
|
SHEIGHT equ 8
|
|
|
|
LOG2_SWIDTH equ 3
|
|
LOG2_SHEIGHT equ 3
|
|
|
|
;;---
|
|
|
|
EXTERN pal_tbl ;:BYTE ; unsigned char pal_tbl[3*256];
|
|
EXTERN pal15_tbl ;:WORD ; unsigned short pal15_tbl[256];
|
|
EXTERN nf_trans16_lo
|
|
EXTERN nf_trans16_hi
|
|
EXTERN snd_8to16 ;: WORD ; short snd_8to16[256];
|
|
|
|
EXTERN nf_buf_cur ;: PTRBYTE ; unsigned char* nf_buf_cur;
|
|
EXTERN nf_buf_prv ;: PTRBYTE ; unsigned char* nf_buf_prv;
|
|
|
|
;; NextFrame parameters
|
|
EXTERN nf_wqty ;: BYTE ;unsigned char nf_wqty; // (width/SWIDTH)
|
|
EXTERN nf_hqty ;: BYTE ;unsigned char nf_hqty; // (height/SHEIGHT)
|
|
EXTERN nf_fqty ;: BYTE ;unsigned char nf_fqty; // Number of fields
|
|
EXTERN nf_hicolor ;: DWORD ;unsigned nf_hicolor; // HiColor (0:none,1:normal,2:swapped)
|
|
|
|
;; <derived quantities>
|
|
EXTERN nf_width ;: DWORD ;unsigned nf_width; // wqty * SWIDTH
|
|
EXTERN nf_height ;: DWORD ;unsigned nf_height; // hqty * SHEIGHT;
|
|
EXTERN nf_new_line ;: DWORD ;unsigned nf_new_line; // width - SWIDTH
|
|
EXTERN nf_new_row0 ;: DWORD ;unsigned nf_new_row0; // SHEIGHT*width*2-width
|
|
EXTERN nf_back_right ;: DWORD ;unsigned nf_back_right; // (SHEIGHT-1)*width
|
|
|
|
;; Frame parameters
|
|
;; Portion of current frame which has been updated
|
|
;; and needs to be sent to screen.
|
|
;;
|
|
EXTERN nf_new_x ;: DWORD ;unsigned nf_new_x;
|
|
EXTERN nf_new_y ;: DWORD ;unsigned nf_new_y;
|
|
EXTERN nf_new_w ;: DWORD ;unsigned nf_new_w;
|
|
EXTERN nf_new_h ;: DWORD ;unsigned nf_new_h;
|
|
|
|
; These are all of our global parameter-passing variables - AH
|
|
extern sndDecompM16_dst
|
|
extern sndDecompM16_src
|
|
extern sndDecompM16_len
|
|
extern sndDecompM16_prev
|
|
extern sndDecompM16_return
|
|
extern sndDecompS16_dst
|
|
extern sndDecompS16_src
|
|
extern sndDecompS16_len
|
|
extern sndDecompS16_prev
|
|
extern sndDecompS16_return
|
|
extern nfHPkDecomp_ops
|
|
extern nfHPkDecomp_comp
|
|
extern nfHPkDecomp_x
|
|
extern nfHPkDecomp_y
|
|
extern nfHPkDecomp_w
|
|
extern nfHPkDecomp_h
|
|
|
|
; This is the global array of pointers to memory locations that
|
|
; need to be self-modified - AH
|
|
extern global_unlock_memory_pointers
|
|
|
|
; These are our functions that the C stubs call - AH
|
|
global _asm_sndDecompM16
|
|
global _asm_sndDecompS16
|
|
global _asm_nfPkConfig
|
|
global _asm_nfHPkDecomp
|
|
|
|
; This is our memory "unlock" function for the self-mofiying asm - AH
|
|
global _asm_selfModify
|
|
|
|
SECTION .data
|
|
|
|
db "(c) 1997 Interplay Productions. All Rights Reserved.\n"
|
|
db "This file is confidential and consists of proprietary information\n"
|
|
db "of Interplay Productions. This file and associated libraries\n"
|
|
db "may not, in whole or in part, be disclosed to third parties,\n"
|
|
db "incorporated into any software product which is not being created\n"
|
|
db "for Interplay Productions, copied or duplicated in any form,\n"
|
|
db "without the prior written permission of Interplay Productions.\n"
|
|
db "Further, you may not reverse engineer, decompile or otherwise\n"
|
|
db "attempt to derive source code of this material.\n",0
|
|
|
|
tbuf dd 0 ;LOCAL tbuf : PTRBYTE
|
|
new_row dd 0 ;LOCAL new_row :DWORD
|
|
DiffBufPtrs dd 0 ;LOCAL DiffBufPtrs :DWORD
|
|
|
|
nfpk_back_right dd 0 ;LOCAL nfpk_back_right : DWORD
|
|
wcnt dd 0 ;LOCAL wcnt :DWORD
|
|
bcomp dd 0 ;LOCAL bcomp :PTRBYTE
|
|
|
|
nfhpk_OpTbl: ;label dword
|
|
dd nf0 ;dword offset nf0 ; Prev Same (0)
|
|
dd nf1 ;dword offset nf1 ; No change (and copied to screen) (0)
|
|
dd nf2 ;dword offset nf2 ; Near shift from older part of current buf (1)
|
|
dd nf3 ;dword offset nf3 ; Near shift from newer part of current buf (1)
|
|
dd nf4 ;dword offset nf4 ; Near shift from previous buffer (1)
|
|
dd nf5 ;dword offset nf5 ; Far shift from previous buffer (2)
|
|
dd nf6 ;dword offset nf6 ; Far shift from current buffer (2)
|
|
; [Or if COMPOPS, run of no changes (0)]
|
|
dd nf7 ;dword offset nf7 ; 8x8x1 (10 bytes) or low 4x4x1 (4 bytes)
|
|
dd nf8 ;dword offset nf8 ; 2x2 4x4x1 (16 bytes) or 2x1 4x8x1 (12 bytes) or 1x2 8x4x1 (12 bytes)
|
|
dd nf9 ;dword offset nf9 ; 8x8x2 (20 bytes) or low 4x4x2 (8 bytes) or
|
|
; low 4x8x2 (12 bytes) or low 8x4x2 (12 bytes)
|
|
dd nf10 ;dword offset nf10 ; 2x2 4x4x2 (32 bytes) or 2x1 4x8x2 (24 bytes) or 1x2 4x8x2 (24 bytes)
|
|
dd nf11 ;dword offset nf11 ; 8x8x8 (64 bytes)
|
|
dd nf12 ;dword offset nf12 ; low 4x4x8 (16 bytes)
|
|
dd nf13 ;dword offset nf13 ; 2x2 4x4x0 (ie 2x2x8) (4 bytes)
|
|
dd nf14 ;dword offset nf14 ; 8x8x0 (1 byte)
|
|
dd nf15 ;dword offset nf15 ; mix 8x8x0 (2 bytes)
|
|
|
|
; signed 8-bit y * nf_width
|
|
nfpk_ShiftY times 256 dd 0
|
|
|
|
; Constant tables
|
|
|
|
; 8-bit -8:7 x nf_width + -8:7
|
|
nfpk_ShiftP1
|
|
|
|
%assign y -8
|
|
%rep 16 ;16
|
|
%assign x -8
|
|
%rep 16 ;16
|
|
db x,y
|
|
%assign x x+1
|
|
%endrep
|
|
%assign y y+1
|
|
%endrep
|
|
|
|
; 8-bit to right and below in roughly 0:14*nf_width + -14:14 (-3 cases)
|
|
; negative is
|
|
; 8-bit to left and above in roughly -14:0*nf_width + -14:14 (-3 cases)
|
|
nfpk_ShiftP2
|
|
|
|
%assign y 0
|
|
%rep 8 ;8
|
|
%assign x 8
|
|
%rep 7 ;7
|
|
db x,y
|
|
%assign x x+1
|
|
%endrep
|
|
%assign y y+1
|
|
%endrep
|
|
|
|
%assign y 8
|
|
%rep 6 ;6
|
|
|
|
%assign x -14
|
|
%rep 14 ;14
|
|
db x,y
|
|
%assign x x+1
|
|
%endrep
|
|
|
|
%assign x 0
|
|
%rep 15 ;15
|
|
db x,y
|
|
%assign x x+1
|
|
%endrep
|
|
|
|
%assign y y+1
|
|
%endrep
|
|
|
|
%assign x -14
|
|
%rep 14 ;14
|
|
db x,14
|
|
%assign x x+1
|
|
%endrep
|
|
|
|
%assign x 0
|
|
%rep 12 ;12
|
|
db x,14
|
|
%assign x x+1
|
|
%endrep
|
|
|
|
; Constant tables
|
|
nfhpk_mov4l ;LABEL DWORD
|
|
; low 4x1 in 8x1 (patch +1)
|
|
; mov eax, ebx/ecx
|
|
db 0c0h+3, 0c0h+3, 0c0h+3, 0c0h+3
|
|
db 0c0h+1, 0c0h+3, 0c0h+3, 0c0h+3
|
|
db 0c0h+3, 0c0h+1, 0c0h+3, 0c0h+3
|
|
db 0c0h+1, 0c0h+1, 0c0h+3, 0c0h+3
|
|
db 0c0h+3, 0c0h+3, 0c0h+1, 0c0h+3
|
|
db 0c0h+1, 0c0h+3, 0c0h+1, 0c0h+3
|
|
db 0c0h+3, 0c0h+1, 0c0h+1, 0c0h+3
|
|
db 0c0h+1, 0c0h+1, 0c0h+1, 0c0h+3
|
|
db 0c0h+3, 0c0h+3, 0c0h+3, 0c0h+1
|
|
db 0c0h+1, 0c0h+3, 0c0h+3, 0c0h+1
|
|
db 0c0h+3, 0c0h+1, 0c0h+3, 0c0h+1
|
|
db 0c0h+1, 0c0h+1, 0c0h+3, 0c0h+1
|
|
db 0c0h+3, 0c0h+3, 0c0h+1, 0c0h+1
|
|
db 0c0h+1, 0c0h+3, 0c0h+1, 0c0h+1
|
|
db 0c0h+3, 0c0h+1, 0c0h+1, 0c0h+1
|
|
db 0c0h+1, 0c0h+1, 0c0h+1, 0c0h+1
|
|
|
|
nfhpk_mov8 ;LABEL DWORD
|
|
; 8x1 (each two bits select a pair of colors in a reg)
|
|
; low 4x2 in 8x2 (each two bits select a duplicated color in reg)
|
|
; (patch +1)
|
|
; mov ds:[edi+0/4/8/12], ebx/edx/ecx/ebp
|
|
; Note: Patched code specifies mov [ebp+0]... instead
|
|
; of mov [edi+0]... to insure that 8-bit offsets are
|
|
; used by the assembler even for offset of zero.
|
|
|
|
%assign m4 24
|
|
%rep 4
|
|
%assign m3 24
|
|
%rep 4
|
|
%assign m2 24
|
|
%rep 4
|
|
%assign m1 24
|
|
%rep 4
|
|
db m1+047h,m2+047h,m3+047h,m4+047h
|
|
%if (m1 == 24)
|
|
%assign m1 16
|
|
%elif (m1 == 16)
|
|
%assign m1 8
|
|
%elif (m1 == 8)
|
|
%assign m1 40
|
|
%endif
|
|
%endrep
|
|
%if (m2 == 24)
|
|
%assign m2 16
|
|
%elif (m2 == 16)
|
|
%assign m2 8
|
|
%elif (m2 == 8)
|
|
%assign m2 40
|
|
%endif
|
|
%endrep
|
|
%if (m3 == 24)
|
|
%assign m3 16
|
|
%elif (m3 == 16)
|
|
%assign m3 8
|
|
%elif (m3 == 8)
|
|
%assign m3 40
|
|
%endif
|
|
%endrep
|
|
%if (m4 == 24)
|
|
%assign m4 16
|
|
%elif (m4 == 16)
|
|
%assign m4 8
|
|
%elif (m4 == 8)
|
|
%assign m4 40
|
|
%endif
|
|
%endrep
|
|
|
|
nfhpk_mov4
|
|
|
|
; 4x2 (patch +2)
|
|
; mov ax, bx/dx/cx/bp
|
|
; low 4x2 in 8x2 (patch +1)
|
|
; mov eax, ebx/edx/ecx/ebp
|
|
%assign m4 0c3h
|
|
%rep 4
|
|
%assign m3 0c3h
|
|
%rep 4
|
|
%assign m2 0c3h
|
|
%rep 4
|
|
%assign m1 0c3h
|
|
%rep 4
|
|
db m1,m2,m3,m4
|
|
%if (m1 == 0c3h)
|
|
%assign m1 0c2h
|
|
%elif (m1 == 0c2h)
|
|
%assign m1 0c1h
|
|
%elif (m1 == 0c1h)
|
|
%assign m1 0c5h
|
|
%endif
|
|
%endrep
|
|
%if (m2 == 0c3h)
|
|
%assign m2 0c2h
|
|
%elif (m2 == 0c2h)
|
|
%assign m2 0c1h
|
|
%elif (m2 == 0c1h)
|
|
%assign m2 0c5h
|
|
%endif
|
|
%endrep
|
|
%if (m3 == 0c3h)
|
|
%assign m3 0c2h
|
|
%elif (m3 == 0c2h)
|
|
%assign m3 0c1h
|
|
%elif (m3 == 0c1h)
|
|
%assign m3 0c5h
|
|
%endif
|
|
%endrep
|
|
%if (m4 == 0c3h)
|
|
%assign m4 0c2h
|
|
%elif (m4 == 0c2h)
|
|
%assign m4 0c1h
|
|
%elif (m4 == 0c1h)
|
|
%assign m4 0c5h
|
|
%endif
|
|
%endrep
|
|
|
|
|
|
SEGMENT .text
|
|
|
|
_asm_selfModify:
|
|
pushf
|
|
; Move the pointer to the start of the pointer array into eax
|
|
mov eax, global_unlock_memory_pointers
|
|
|
|
; Load unlock addresses from _asm_nfHPkDecomp
|
|
mov dword [eax], nf7_0
|
|
mov dword [eax + 4], nf8_0
|
|
mov dword [eax + 8], nf9_0
|
|
mov dword [eax + 12], nf10_0
|
|
mov dword [eax + 16], nf23_0
|
|
mov dword [eax + 20], nf24_0
|
|
mov dword [eax + 24], nf25_0
|
|
mov dword [eax + 28], nf26_0
|
|
mov dword [eax + 32], nf42_0
|
|
|
|
popf
|
|
ret
|
|
|
|
;--------------------------------------------------------------------
|
|
; Sound Management
|
|
;--------------------
|
|
|
|
;unsigned sndDecompM16(unsigned short *dst, const unsigned char *src,
|
|
; unsigned len, unsigned prev);
|
|
;
|
|
;Decompresses a mono stream containing len samples
|
|
;(src is len bytes, dst is len*2 bytes)
|
|
;prev is the previous decompression state or zero.
|
|
;Returns new decompression state.
|
|
;
|
|
_asm_sndDecompM16: ; PROC USES ESI EDI EBX, \
|
|
; dst:PTRWORD, src:PTRBYTE, len:DWORD, prev:DWORD
|
|
mov eax, [sndDecompM16_prev]
|
|
|
|
mov ecx, [sndDecompM16_len]
|
|
jecxz done
|
|
|
|
mov esi, [sndDecompM16_src]
|
|
mov edi, [sndDecompM16_dst]
|
|
|
|
xor ebx, ebx
|
|
|
|
lp: mov bl, byte [esi]
|
|
add esi, 1
|
|
add ax, word [snd_8to16 + ebx*2]
|
|
mov word [edi], ax
|
|
add edi, 2
|
|
dec ecx
|
|
jnz lp
|
|
|
|
done:
|
|
; Store our return value - AH
|
|
mov dword [sndDecompM16_return], eax
|
|
|
|
ret
|
|
;sndDecompM16 ENDP
|
|
|
|
;unsigned sndDecompS16(unsigned short *dst, const unsigned char *src,
|
|
; unsigned len, unsigned prev);
|
|
;
|
|
;Decompresses a stereo stream containing len samples
|
|
;(src is len*2 bytes, dst is len*4 bytes)
|
|
;prev is the previous decompression state or zero
|
|
; (It encodes the 16-bit states of the two stereo channels
|
|
; in its low and high order 16-bit halves.)
|
|
;Returns new decompression state.
|
|
;
|
|
_asm_sndDecompS16: ; PROC USES ESI EDI EBX, \
|
|
; dst:PTRWORD, src:PTRBYTE, len:DWORD, prev:DWORD
|
|
movzx eax, word [sndDecompS16_prev]
|
|
movzx edx, word [sndDecompS16_prev+2]
|
|
|
|
mov ecx, [sndDecompS16_len]
|
|
jecxz Sdone
|
|
|
|
mov esi, [sndDecompS16_src]
|
|
mov edi, [sndDecompS16_dst]
|
|
|
|
xor ebx, ebx
|
|
|
|
Slp: mov bl, byte [esi]
|
|
add esi, 1
|
|
add ax, word [snd_8to16 + ebx*2]
|
|
mov word [edi], ax
|
|
add edi, 2
|
|
|
|
mov bl, byte [esi]
|
|
add esi, 1
|
|
add dx, word [snd_8to16 + ebx*2]
|
|
mov word [edi], dx
|
|
add edi, 2
|
|
|
|
dec ecx
|
|
jnz Slp
|
|
|
|
Sdone: shl edx, 16
|
|
or eax, edx
|
|
|
|
; Store our return value - AH
|
|
mov dword [sndDecompS16_return], eax
|
|
ret
|
|
|
|
;sndDecompS16 ENDP
|
|
|
|
;--------------------------------------------------------------------
|
|
; NextFrame (Video Decompression)
|
|
;----------------------------------
|
|
|
|
%macro NF_DECOMP_INIT 1 ;HI_COLOR_FLAG: REQ
|
|
|
|
mov eax, [nf_buf_prv] ;br ; DiffBufPtrs = nf_buf_prv - nf_buf_cur
|
|
sub eax, [nf_buf_cur] ;br
|
|
mov [DiffBufPtrs], eax
|
|
|
|
xor ebx, ebx ; ebx = nf_fqty (convert to 32-bits)
|
|
mov bl, [nf_fqty]
|
|
|
|
mov eax, [nfHPkDecomp_x] ;br ; nf_new_x = x*SWIDTH*2^HI_COLOR_FLAG;
|
|
shl eax, LOG2_SWIDTH+%1 ;HI_COLOR_FLAG
|
|
mov [nf_new_x], eax
|
|
|
|
mov eax, [nfHPkDecomp_w] ;br ; nf_new_w = w*SWIDTH*2^HI_COLOR_FLAG;
|
|
shl eax, LOG2_SWIDTH+%1 ;HI_COLOR_FLAG
|
|
mov [nf_new_w], eax
|
|
|
|
mov eax, [nfHPkDecomp_y] ;br ; nf_new_y = y*nf_fqty*SHEIGHT;
|
|
shl eax, LOG2_SHEIGHT
|
|
mul ebx ;nf_fqty
|
|
mov [nf_new_y], eax
|
|
|
|
mov eax, [nfHPkDecomp_h] ;br ; nf_new_h = h*nf_fqty*SHEIGHT;
|
|
shl eax, LOG2_SHEIGHT
|
|
mul ebx ;nf_fqty
|
|
mov [nf_new_h], eax
|
|
|
|
mov eax, [nf_new_row0] ;br ; new_row = nf_new_row0 - nf_new_w;
|
|
sub eax, [nf_new_w] ;br
|
|
mov [new_row], eax
|
|
|
|
;; Move to correct place in current buffer
|
|
mov eax, [nf_buf_cur] ;br ; tbuf = nf_buf_cur
|
|
mov [tbuf], eax
|
|
; %if (nfHPkDecomp_x || nfHPkDecomp_y) ; if (x||y)
|
|
; cmp, jnz, jz and labels are mine - AH
|
|
cmp dword [nfHPkDecomp_x], 0
|
|
jne before
|
|
cmp dword [nfHPkDecomp_y], 0
|
|
je after
|
|
before:
|
|
mov eax, [nf_new_y] ;br ; tbuf += nf_new_y*nf_width + nf_new_x;
|
|
mul dword [nf_width] ; Added dword - AH
|
|
add eax, [nf_new_x] ;br
|
|
add [tbuf], eax
|
|
after:
|
|
; %endif
|
|
|
|
%endmacro ; DECOMP_INIT
|
|
|
|
;----------------------------------------------------------------------
|
|
|
|
; nfPkConfig initializes tables used by nfPkDecomp
|
|
; which are dependent on screen size.
|
|
_asm_nfPkConfig: ; PROC USES ESI EDI EBX
|
|
|
|
; Build ShiftY table
|
|
;
|
|
lea edi, [nfpk_ShiftY]
|
|
mov ebx, [nf_width]
|
|
|
|
mov eax, 0
|
|
mov ecx, 128
|
|
lp1: mov [edi], eax
|
|
add edi,4
|
|
add eax,ebx
|
|
dec ecx
|
|
jne lp1
|
|
|
|
mov eax, ebx
|
|
shl eax, 7
|
|
neg eax
|
|
mov ecx, 128
|
|
lp2: mov [edi], eax
|
|
add edi,4
|
|
add eax,ebx
|
|
dec ecx
|
|
jne lp2
|
|
|
|
ret
|
|
;nfPkConfig ENDP
|
|
|
|
%macro Trans16_3 3 ; dst:req, idx:req, mask
|
|
xor eax, eax
|
|
mov al, [%2]
|
|
mov %1, [nf_trans16_lo + eax*2]
|
|
xor eax, eax
|
|
mov al, [%2+1]
|
|
or %1, [nf_trans16_hi + eax*2]
|
|
%endmacro
|
|
|
|
%macro Trans16 2 ; dst:req, idx:req
|
|
xor eax, eax
|
|
mov al, [%2]
|
|
mov %1, [nf_trans16_lo + eax*2]
|
|
xor eax, eax
|
|
mov al, [%2+1]
|
|
or %1, [nf_trans16_hi + eax*2]
|
|
%endmacro
|
|
|
|
_asm_nfHPkDecomp: ; PROC USES ESI EDI EBX, \
|
|
; ops:PTRBYTE, comp:PTRBYTE, \
|
|
; x:DWORD, y:DWORD, w:DWORD, h:DWORD
|
|
|
|
NF_DECOMP_INIT 1
|
|
|
|
mov eax, [nf_back_right] ;br
|
|
sub eax, SWIDTH*2
|
|
mov [nfpk_back_right], eax
|
|
|
|
mov esi, [nfHPkDecomp_comp] ;br
|
|
mov edi, [tbuf]
|
|
|
|
xor eax, eax
|
|
mov ax, [esi]
|
|
add eax, esi
|
|
mov [bcomp], eax
|
|
add esi, 2
|
|
|
|
nf_StartRow:
|
|
mov eax, [nfHPkDecomp_w] ;br
|
|
shr eax, 1
|
|
mov [wcnt],eax
|
|
ALIGN 4
|
|
nf_NextPair:
|
|
dec dword [wcnt] ; Added dword - AH
|
|
js nf_NextRow
|
|
mov ebx, [nfHPkDecomp_ops] ; br
|
|
mov al, [ebx]
|
|
inc ebx
|
|
mov [nfHPkDecomp_ops], ebx
|
|
|
|
xor ebx, ebx
|
|
mov bl, al
|
|
shr bl, 4
|
|
and eax, 0Fh
|
|
push dword nf_NextPair ; Added dword on these two - AH
|
|
push dword [nfhpk_OpTbl + ebx*4]
|
|
jmp [nfhpk_OpTbl + eax*4]
|
|
|
|
nf_NextRow:
|
|
add edi, [new_row] ;br
|
|
dec dword [nfHPkDecomp_h] ; Added dword - AH
|
|
jnz nf_StartRow
|
|
|
|
ret
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf0: ; No change from previous buffer
|
|
mov eax, [DiffBufPtrs] ; br
|
|
jmp nf_shift
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf1: ; No change (and copied to screen)
|
|
|
|
add edi, SWIDTH*2
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf2: ; Near shift from older part of current buffer
|
|
xor eax, eax
|
|
mov ebx, [bcomp] ; br
|
|
inc dword [bcomp] ; Added dword - AH
|
|
mov al, [ebx]
|
|
mov ax, [nfpk_ShiftP2 + eax*2]
|
|
nf_xyc_shift:
|
|
xor ebx, ebx
|
|
mov bl, ah
|
|
shl eax, 24
|
|
sar eax, 24-1
|
|
add eax, [nfpk_ShiftY + ebx*4]
|
|
jmp nf_shift
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf3: ; Near shift from newer part of current buffer
|
|
xor eax, eax
|
|
mov ebx, [bcomp] ; br
|
|
inc dword [bcomp] ; Added dword - AH
|
|
mov al, [ebx]
|
|
mov ax, [nfpk_ShiftP2 + eax*2]
|
|
neg al
|
|
neg ah
|
|
jmp nf_xyc_shift
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf4: ; Near shift from previous buffer
|
|
xor eax, eax
|
|
mov ebx, [bcomp] ; br
|
|
inc dword [bcomp] ; Added dword - AH
|
|
mov al, [ebx]
|
|
mov ax, [nfpk_ShiftP1 + eax*2]
|
|
jmp nf_xyp_shift
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf5: ; Far shift from previous buffer
|
|
mov ax, [esi]
|
|
add esi, 2
|
|
nf_xyp_shift:
|
|
xor ebx, ebx
|
|
mov bl, ah
|
|
shl eax, 24
|
|
sar eax, 24-1
|
|
add eax, [nfpk_ShiftY + ebx*4]
|
|
add eax, [DiffBufPtrs] ; br
|
|
jmp nf_shift
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
|
|
nf6: ; Far shift from current buffer
|
|
mov ax, [esi]
|
|
add esi, 2
|
|
jmp nf_xyc_shift
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf_shift:
|
|
|
|
mov ebx, esi ; save esi
|
|
lea esi, [edi+eax]
|
|
mov edx, [nf_width]
|
|
|
|
%rep 7
|
|
mov eax, [esi]
|
|
mov [edi], eax
|
|
mov eax, [esi+4]
|
|
mov [edi+4], eax
|
|
mov eax, [esi+8]
|
|
mov [edi+8], eax
|
|
mov eax, [esi+12]
|
|
mov [edi+12], eax
|
|
add esi, edx
|
|
add edi, edx
|
|
%endrep
|
|
mov eax, [esi]
|
|
mov [edi], eax
|
|
mov eax, [esi+4]
|
|
mov [edi+4], eax
|
|
mov eax, [esi+8]
|
|
mov [edi+8], eax
|
|
mov eax, [esi+12]
|
|
mov [edi+12], eax
|
|
|
|
sub edi, [nfpk_back_right] ;br ; (SHEIGHT-1)*width+8
|
|
mov esi, ebx ; restore esi
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf7: ; 8x8x1 (12 bytes)
|
|
test word [esi], 08000h
|
|
jnz near nf23
|
|
|
|
xor eax, eax
|
|
|
|
lea ecx, [nfhpk_mov8]
|
|
lea edx, [nf7_11+1] ; Removed byte ds:- AH
|
|
|
|
mov al, [esi+4]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf7_11-nf7_11)], bl
|
|
mov [edx+(nf7_12-nf7_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf7_13-nf7_11)], bl
|
|
mov [edx+(nf7_14-nf7_11)], bh
|
|
|
|
mov al, [esi+5]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf7_21-nf7_11)], bl
|
|
mov [edx+(nf7_22-nf7_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf7_23-nf7_11)], bl
|
|
mov [edx+(nf7_24-nf7_11)], bh
|
|
|
|
mov al, [esi+6]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf7_31-nf7_11)], bl
|
|
mov [edx+(nf7_32-nf7_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf7_33-nf7_11)], bl
|
|
mov [edx+(nf7_34-nf7_11)], bh
|
|
|
|
mov al, [esi+7]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf7_41-nf7_11)], bl
|
|
mov [edx+(nf7_42-nf7_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf7_43-nf7_11)], bl
|
|
mov [edx+(nf7_44-nf7_11)], bh
|
|
|
|
lea edx, [edx+(nf7_51-nf7_11)]
|
|
|
|
mov al, [esi+8]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf7_51-nf7_51)], bl
|
|
mov [edx+(nf7_52-nf7_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf7_53-nf7_51)], bl
|
|
mov [edx+(nf7_54-nf7_51)], bh
|
|
|
|
mov al, [esi+9]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf7_61-nf7_51)], bl
|
|
mov [edx+(nf7_62-nf7_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf7_63-nf7_51)], bl
|
|
mov [edx+(nf7_64-nf7_51)], bh
|
|
|
|
mov al, [esi+10]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf7_71-nf7_51)], bl
|
|
mov [edx+(nf7_72-nf7_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf7_73-nf7_51)], bl
|
|
mov [edx+(nf7_74-nf7_51)], bh
|
|
|
|
mov al, [esi+11]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf7_81-nf7_51)], bl
|
|
mov [edx+(nf7_82-nf7_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf7_83-nf7_51)], bl
|
|
mov [edx+(nf7_84-nf7_51)], bh
|
|
|
|
push ebp
|
|
push esi
|
|
; load ebx,edx,ecx,ebp with 00,01,10,11 color combinations
|
|
; (note that bits are read least significant first).
|
|
|
|
Trans16 cx, esi+2
|
|
shl ecx, 16
|
|
Trans16 cx, esi
|
|
|
|
mov esi,[nf_width]
|
|
mov edx, ecx
|
|
|
|
ror edx, 16
|
|
mov ebx, edx
|
|
mov bx, cx
|
|
mov ebp, ecx
|
|
mov bp, dx
|
|
|
|
jmp nf7_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf7_0:
|
|
nf7_11: mov [ebp+0], ebx
|
|
nf7_12: mov [ebp+4], ebx
|
|
nf7_13: mov [ebp+8], ebx
|
|
nf7_14: mov [ebp+12], ebx
|
|
add edi, esi
|
|
|
|
nf7_21: mov [ebp+0], ebx
|
|
nf7_22: mov [ebp+4], ebx
|
|
nf7_23: mov [ebp+8], ebx
|
|
nf7_24: mov [ebp+12], ebx
|
|
add edi, esi
|
|
|
|
nf7_31: mov [ebp+0], ebx
|
|
nf7_32: mov [ebp+4], ebx
|
|
nf7_33: mov [ebp+8], ebx
|
|
nf7_34: mov [ebp+12], ebx
|
|
add edi, esi
|
|
|
|
nf7_41: mov [ebp+0], ebx
|
|
nf7_42: mov [ebp+4], ebx
|
|
nf7_43: mov [ebp+8], ebx
|
|
nf7_44: mov [ebp+12], ebx
|
|
add edi, esi
|
|
|
|
nf7_51: mov [ebp+0], ebx
|
|
nf7_52: mov [ebp+4], ebx
|
|
nf7_53: mov [ebp+8], ebx
|
|
nf7_54: mov [ebp+12], ebx
|
|
add edi, esi
|
|
|
|
nf7_61: mov [ebp+0], ebx
|
|
nf7_62: mov [ebp+4], ebx
|
|
nf7_63: mov [ebp+8], ebx
|
|
nf7_64: mov [ebp+12], ebx
|
|
add edi, esi
|
|
|
|
nf7_71: mov [ebp+0], ebx
|
|
nf7_72: mov [ebp+4], ebx
|
|
nf7_73: mov [ebp+8], ebx
|
|
nf7_74: mov [ebp+12], ebx
|
|
add edi, esi
|
|
|
|
nf7_81: mov [ebp+0], ebx
|
|
nf7_82: mov [ebp+4], ebx
|
|
nf7_83: mov [ebp+8], ebx
|
|
nf7_84: mov [ebp+12], ebx
|
|
|
|
pop esi
|
|
pop ebp
|
|
add esi, 12
|
|
sub edi, [nfpk_back_right] ;br ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
;nf7+16
|
|
nf23: ; low 4x4x1 (6 bytes)
|
|
|
|
xor eax, eax
|
|
lea ecx, [nfhpk_mov4l]
|
|
lea edx, [nf23_11+1] ; Removed byte ds: - AH
|
|
|
|
mov al, [esi+4]
|
|
and al, 0fH
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf23_11-nf23_11)], bl
|
|
mov [edx+(nf23_12-nf23_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf23_13-nf23_11)], bl
|
|
mov [edx+(nf23_14-nf23_11)], bh
|
|
|
|
mov al, [esi+4]
|
|
shr al, 4
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf23_31-nf23_11)], bl
|
|
mov [edx+(nf23_32-nf23_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf23_33-nf23_11)], bl
|
|
mov [edx+(nf23_34-nf23_11)], bh
|
|
|
|
|
|
mov al, [esi+5]
|
|
and al, 0fH
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf23_51-nf23_11)], bl
|
|
mov [edx+(nf23_52-nf23_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf23_53-nf23_11)], bl
|
|
mov [edx+(nf23_54-nf23_11)], bh
|
|
|
|
mov al, [esi+5]
|
|
shr al, 4
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf23_71-nf23_11)], bl
|
|
mov [edx+(nf23_72-nf23_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf23_73-nf23_11)], bl
|
|
mov [edx+(nf23_74-nf23_11)], bh
|
|
|
|
mov edx, [nf_width]
|
|
|
|
; load ebx,ecx with 00,11 color combinations
|
|
|
|
Trans16_3 cx, esi, 1
|
|
shrd ebx, ecx, 16
|
|
mov bx, cx
|
|
Trans16 cx, esi+2
|
|
shrd eax, ecx, 16
|
|
mov ax, cx
|
|
mov ecx, eax
|
|
|
|
jmp nf23_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf23_0:
|
|
|
|
nf23_11:mov eax, ebx
|
|
mov [edi], eax
|
|
mov [edi+edx], eax
|
|
nf23_12:mov eax, ebx
|
|
mov [edi+4], eax
|
|
mov [edi+edx+4], eax
|
|
nf23_13:mov eax, ebx
|
|
mov [edi+8], eax
|
|
mov [edi+edx+8], eax
|
|
nf23_14:mov eax, ebx
|
|
mov [edi+12], eax
|
|
mov [edi+edx+12], eax
|
|
lea edi, [edi+edx*2]
|
|
|
|
nf23_31:mov eax, ebx
|
|
mov [edi], eax
|
|
mov [edi+edx], eax
|
|
nf23_32:mov eax, ebx
|
|
mov [edi+4], eax
|
|
mov [edi+edx+4], eax
|
|
nf23_33:mov eax, ebx
|
|
mov [edi+8], eax
|
|
mov [edi+edx+8], eax
|
|
nf23_34:mov eax, ebx
|
|
mov [edi+12], eax
|
|
mov [edi+edx+12], eax
|
|
lea edi, [edi+edx*2]
|
|
|
|
nf23_51:mov eax, ebx
|
|
mov [edi], eax
|
|
mov [edi+edx], eax
|
|
nf23_52:mov eax, ebx
|
|
mov [edi+4], eax
|
|
mov [edi+edx+4], eax
|
|
nf23_53:mov eax, ebx
|
|
mov [edi+8], eax
|
|
mov [edi+edx+8], eax
|
|
nf23_54:mov eax, ebx
|
|
mov [edi+12], eax
|
|
mov [edi+edx+12], eax
|
|
lea edi, [edi+edx*2]
|
|
|
|
nf23_71:mov eax, ebx
|
|
mov [edi], eax
|
|
mov [edi+edx], eax
|
|
nf23_72:mov eax, ebx
|
|
mov [edi+4], eax
|
|
mov [edi+edx+4], eax
|
|
nf23_73:mov eax, ebx
|
|
mov [edi+8], eax
|
|
mov [edi+edx+8], eax
|
|
nf23_74:mov eax, ebx
|
|
mov [edi+12], eax
|
|
mov [edi+edx+12], eax
|
|
add edi, edx
|
|
|
|
sub edi, [nfpk_back_right]
|
|
add esi, 6
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf8: ; 2x2 4x4x1 (24 bytes)
|
|
test word [esi], 08000h
|
|
jnz near nf24
|
|
|
|
xor eax, eax
|
|
|
|
lea ecx, [nfhpk_mov8]
|
|
lea edx, [nf8_11+1] ; Removed byte ds: - AH
|
|
|
|
mov al, [esi+4]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf8_11-nf8_11)], bl
|
|
mov [edx+(nf8_12-nf8_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf8_13-nf8_11)], bl
|
|
mov [edx+(nf8_14-nf8_11)], bh
|
|
|
|
mov al, [esi+5]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf8_21-nf8_11)], bl
|
|
mov [edx+(nf8_22-nf8_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf8_23-nf8_11)], bl
|
|
mov [edx+(nf8_24-nf8_11)], bh
|
|
|
|
|
|
mov al, [esi+10]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf8_31-nf8_11)], bl
|
|
mov [edx+(nf8_32-nf8_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf8_33-nf8_11)], bl
|
|
mov [edx+(nf8_34-nf8_11)], bh
|
|
|
|
mov al, [esi+11]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf8_41-nf8_11)], bl
|
|
mov [edx+(nf8_42-nf8_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf8_43-nf8_11)], bl
|
|
mov [edx+(nf8_44-nf8_11)], bh
|
|
|
|
add edx, nf8_51-nf8_11
|
|
|
|
mov al, [esi+16]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf8_51-nf8_51)], bl
|
|
mov [edx+(nf8_52-nf8_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf8_53-nf8_51)], bl
|
|
mov [edx+(nf8_54-nf8_51)], bh
|
|
|
|
mov al, [esi+17]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf8_61-nf8_51)], bl
|
|
mov [edx+(nf8_62-nf8_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf8_63-nf8_51)], bl
|
|
mov [edx+(nf8_64-nf8_51)], bh
|
|
|
|
|
|
mov al, [esi+22]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf8_71-nf8_51)], bl
|
|
mov [edx+(nf8_72-nf8_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf8_73-nf8_51)], bl
|
|
mov [edx+(nf8_74-nf8_51)], bh
|
|
|
|
mov al, [esi+23]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf8_81-nf8_51)], bl
|
|
mov [edx+(nf8_82-nf8_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf8_83-nf8_51)], bl
|
|
mov [edx+(nf8_84-nf8_51)], bh
|
|
|
|
|
|
push ebp
|
|
push esi
|
|
; load ebx,edx,ecx,ebp with 00,01,10,11 color combinations
|
|
; (note that bits are read least significant first).
|
|
|
|
Trans16 cx, esi+18+2
|
|
shl ecx, 16
|
|
Trans16 cx, esi+18
|
|
push ecx
|
|
|
|
Trans16 cx, esi+12+2
|
|
shl ecx, 16
|
|
Trans16 cx, esi+12
|
|
push ecx
|
|
|
|
Trans16 cx, esi+6+2
|
|
shl ecx, 16
|
|
Trans16 cx, esi+6
|
|
push ecx
|
|
|
|
Trans16 cx, esi+2
|
|
shl ecx, 16
|
|
Trans16 cx, esi
|
|
|
|
mov esi,[nf_width]
|
|
mov edx, ecx
|
|
ror edx, 16
|
|
mov ebx, edx
|
|
mov bx, cx
|
|
mov ebp, ecx
|
|
mov bp, dx
|
|
|
|
jmp nf8_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf8_0:
|
|
nf8_11: mov [ebp+0], ebx
|
|
nf8_12: mov [ebp+4], ebx
|
|
add edi, esi
|
|
nf8_13: mov [ebp+0], ebx
|
|
nf8_14: mov [ebp+4], ebx
|
|
add edi, esi
|
|
|
|
nf8_21: mov [ebp+0], ebx
|
|
nf8_22: mov [ebp+4], ebx
|
|
add edi, esi
|
|
nf8_23: mov [ebp+0], ebx
|
|
nf8_24: mov [ebp+4], ebx
|
|
add edi, esi
|
|
|
|
pop ecx
|
|
|
|
mov edx, ecx
|
|
ror edx, 16
|
|
mov ebx, edx
|
|
mov bx, cx
|
|
mov ebp, ecx
|
|
mov bp, dx
|
|
|
|
|
|
nf8_31: mov [ebp+0], ebx
|
|
nf8_32: mov [ebp+4], ebx
|
|
add edi, esi
|
|
nf8_33: mov [ebp+0], ebx
|
|
nf8_34: mov [ebp+4], ebx
|
|
add edi, esi
|
|
|
|
nf8_41: mov [ebp+0], ebx
|
|
nf8_42: mov [ebp+4], ebx
|
|
add edi, esi
|
|
nf8_43: mov [ebp+0], ebx
|
|
nf8_44: mov [ebp+4], ebx
|
|
add edi, esi
|
|
|
|
lea eax, [esi*8-8]
|
|
sub edi, eax
|
|
|
|
pop ecx
|
|
|
|
mov edx, ecx
|
|
ror edx, 16
|
|
mov ebx, edx
|
|
mov bx, cx
|
|
mov ebp, ecx
|
|
mov bp, dx
|
|
|
|
nf8_51: mov [ebp+0], ebx
|
|
nf8_52: mov [ebp+4], ebx
|
|
add edi, esi
|
|
nf8_53: mov [ebp+0], ebx
|
|
nf8_54: mov [ebp+4], ebx
|
|
add edi, esi
|
|
|
|
nf8_61: mov [ebp+0], ebx
|
|
nf8_62: mov [ebp+4], ebx
|
|
add edi, esi
|
|
nf8_63: mov [ebp+0], ebx
|
|
nf8_64: mov [ebp+4], ebx
|
|
add edi, esi
|
|
|
|
pop ecx
|
|
|
|
mov edx, ecx
|
|
ror edx, 16
|
|
mov ebx, edx
|
|
mov bx, cx
|
|
mov ebp, ecx
|
|
mov bp, dx
|
|
|
|
nf8_71: mov [ebp+0], ebx
|
|
nf8_72: mov [ebp+4], ebx
|
|
add edi, esi
|
|
nf8_73: mov [ebp+0], ebx
|
|
nf8_74: mov [ebp+4], ebx
|
|
add edi, esi
|
|
|
|
nf8_81: mov [ebp+0], ebx
|
|
nf8_82: mov [ebp+4], ebx
|
|
add edi, esi
|
|
nf8_83: mov [ebp+0], ebx
|
|
nf8_84: mov [ebp+4], ebx
|
|
|
|
pop esi
|
|
pop ebp
|
|
add esi, 24
|
|
sub edi, 8
|
|
sub edi, [nfpk_back_right] ;br ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
;nf8+16
|
|
nf24: ; 2x1 4x8x1 (16 bytes)
|
|
|
|
test word [esi+8], 08000h
|
|
jnz near nf40
|
|
|
|
xor eax, eax
|
|
|
|
lea ecx, [nfhpk_mov8]
|
|
lea edx, [nf24_11+1] ; Removed byte ds: - AH
|
|
|
|
mov al, [esi+4]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf24_11-nf24_11)], bl
|
|
mov [edx+(nf24_12-nf24_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf24_13-nf24_11)], bl
|
|
mov [edx+(nf24_14-nf24_11)], bh
|
|
|
|
mov al, [esi+5]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf24_21-nf24_11)], bl
|
|
mov [edx+(nf24_22-nf24_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf24_23-nf24_11)], bl
|
|
mov [edx+(nf24_24-nf24_11)], bh
|
|
|
|
|
|
mov al, [esi+6]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf24_31-nf24_11)], bl
|
|
mov [edx+(nf24_32-nf24_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf24_33-nf24_11)], bl
|
|
mov [edx+(nf24_34-nf24_11)], bh
|
|
|
|
mov al, [esi+7]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf24_41-nf24_11)], bl
|
|
mov [edx+(nf24_42-nf24_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf24_43-nf24_11)], bl
|
|
mov [edx+(nf24_44-nf24_11)], bh
|
|
|
|
add edx, nf24_51-nf24_11
|
|
|
|
mov al, [esi+12]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf24_51-nf24_51)], bl
|
|
mov [edx+(nf24_52-nf24_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf24_53-nf24_51)], bl
|
|
mov [edx+(nf24_54-nf24_51)], bh
|
|
|
|
mov al, [esi+13]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf24_61-nf24_51)], bl
|
|
mov [edx+(nf24_62-nf24_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf24_63-nf24_51)], bl
|
|
mov [edx+(nf24_64-nf24_51)], bh
|
|
|
|
|
|
mov al, [esi+14]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf24_71-nf24_51)], bl
|
|
mov [edx+(nf24_72-nf24_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf24_73-nf24_51)], bl
|
|
mov [edx+(nf24_74-nf24_51)], bh
|
|
|
|
mov al, [esi+15]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf24_81-nf24_51)], bl
|
|
mov [edx+(nf24_82-nf24_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf24_83-nf24_51)], bl
|
|
mov [edx+(nf24_84-nf24_51)], bh
|
|
|
|
|
|
push ebp
|
|
push esi
|
|
; load ebx,edx,ecx,ebp with 00,01,10,11 color combinations
|
|
; (note that bits are read least significant first).
|
|
|
|
Trans16 cx, esi+8+2
|
|
shl ecx, 16
|
|
Trans16 cx, esi+8
|
|
push ecx
|
|
|
|
Trans16 cx, esi+2
|
|
shl ecx, 16
|
|
Trans16_3 cx, esi, 1
|
|
|
|
mov esi,[nf_width]
|
|
mov edx, ecx
|
|
ror edx, 16
|
|
mov ebx, edx
|
|
mov bx, cx
|
|
mov ebp, ecx
|
|
mov bp, dx
|
|
|
|
jmp nf24_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf24_0:
|
|
|
|
nf24_11:mov [ebp+0], ebx
|
|
nf24_12:mov [ebp+4], ebx
|
|
add edi, esi
|
|
nf24_13:mov [ebp+0], ebx
|
|
nf24_14:mov [ebp+4], ebx
|
|
add edi, esi
|
|
|
|
nf24_21:mov [ebp+0], ebx
|
|
nf24_22:mov [ebp+4], ebx
|
|
add edi, esi
|
|
nf24_23:mov [ebp+0], ebx
|
|
nf24_24:mov [ebp+4], ebx
|
|
add edi, esi
|
|
|
|
nf24_31:mov [ebp+0], ebx
|
|
nf24_32:mov [ebp+4], ebx
|
|
add edi, esi
|
|
nf24_33:mov [ebp+0], ebx
|
|
nf24_34:mov [ebp+4], ebx
|
|
add edi, esi
|
|
|
|
nf24_41:mov [ebp+0], ebx
|
|
nf24_42:mov [ebp+4], ebx
|
|
add edi, esi
|
|
nf24_43:mov [ebp+0], ebx
|
|
nf24_44:mov [ebp+4], ebx
|
|
add edi, esi
|
|
|
|
lea eax, [esi*8-8]
|
|
sub edi, eax
|
|
|
|
pop ecx
|
|
|
|
mov edx, ecx
|
|
ror edx, 16
|
|
mov ebx, edx
|
|
mov bx, cx
|
|
mov ebp, ecx
|
|
mov bp, dx
|
|
|
|
nf24_51:mov [ebp+0], ebx
|
|
nf24_52:mov [ebp+4], ebx
|
|
add edi, esi
|
|
nf24_53:mov [ebp+0], ebx
|
|
nf24_54:mov [ebp+4], ebx
|
|
add edi, esi
|
|
|
|
nf24_61:mov [ebp+0], ebx
|
|
nf24_62:mov [ebp+4], ebx
|
|
add edi, esi
|
|
nf24_63:mov [ebp+0], ebx
|
|
nf24_64:mov [ebp+4], ebx
|
|
add edi, esi
|
|
|
|
nf24_71:mov [ebp+0], ebx
|
|
nf24_72:mov [ebp+4], ebx
|
|
add edi, esi
|
|
nf24_73:mov [ebp+0], ebx
|
|
nf24_74:mov [ebp+4], ebx
|
|
add edi, esi
|
|
|
|
nf24_81:mov [ebp+0], ebx
|
|
nf24_82:mov [ebp+4], ebx
|
|
add edi, esi
|
|
nf24_83:mov [ebp+0], ebx
|
|
nf24_84:mov [ebp+4], ebx
|
|
|
|
pop esi
|
|
pop ebp
|
|
add esi, 16
|
|
sub edi, 8
|
|
sub edi, [nfpk_back_right] ;br ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
;nf8+32
|
|
nf40: ; 1x2 8x4x1 (16 bytes)
|
|
|
|
xor eax, eax
|
|
|
|
lea ecx, [nfhpk_mov8]
|
|
lea edx, [nf40_11+1] ; Removed byte ds: - AH
|
|
|
|
mov al, [esi+4]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf40_11-nf40_11)], bl
|
|
mov [edx+(nf40_12-nf40_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf40_13-nf40_11)], bl
|
|
mov [edx+(nf40_14-nf40_11)], bh
|
|
|
|
mov al, [esi+5]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf40_21-nf40_11)], bl
|
|
mov [edx+(nf40_22-nf40_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf40_23-nf40_11)], bl
|
|
mov [edx+(nf40_24-nf40_11)], bh
|
|
|
|
|
|
mov al, [esi+6]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf40_31-nf40_11)], bl
|
|
mov [edx+(nf40_32-nf40_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf40_33-nf40_11)], bl
|
|
mov [edx+(nf40_34-nf40_11)], bh
|
|
|
|
mov al, [esi+7]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf40_41-nf40_11)], bl
|
|
mov [edx+(nf40_42-nf40_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf40_43-nf40_11)], bl
|
|
mov [edx+(nf40_44-nf40_11)], bh
|
|
|
|
add edx, nf40_51-nf40_11
|
|
|
|
mov al, [esi+12]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf40_51-nf40_51)], bl
|
|
mov [edx+(nf40_52-nf40_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf40_53-nf40_51)], bl
|
|
mov [edx+(nf40_54-nf40_51)], bh
|
|
|
|
mov al, [esi+13]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf40_61-nf40_51)], bl
|
|
mov [edx+(nf40_62-nf40_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf40_63-nf40_51)], bl
|
|
mov [edx+(nf40_64-nf40_51)], bh
|
|
|
|
|
|
mov al, [esi+14]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf40_71-nf40_51)], bl
|
|
mov [edx+(nf40_72-nf40_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf40_73-nf40_51)], bl
|
|
mov [edx+(nf40_74-nf40_51)], bh
|
|
|
|
mov al, [esi+15]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf40_81-nf40_51)], bl
|
|
mov [edx+(nf40_82-nf40_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf40_83-nf40_51)], bl
|
|
mov [edx+(nf40_84-nf40_51)], bh
|
|
|
|
|
|
push ebp
|
|
push esi
|
|
; load ebx,edx,ecx,ebp with 00,01,10,11 color combinations
|
|
; (note that bits are read least significant first).
|
|
|
|
Trans16 cx, esi+8+2
|
|
shl ecx, 16
|
|
Trans16_3 cx, esi+8, 1
|
|
push ecx
|
|
|
|
Trans16 cx, esi+2
|
|
shl ecx, 16
|
|
Trans16_3 cx, esi, 1
|
|
|
|
mov esi,[nf_width]
|
|
mov edx, ecx
|
|
ror edx, 16
|
|
mov ebx, edx
|
|
mov bx, cx
|
|
mov ebp, ecx
|
|
mov bp, dx
|
|
|
|
jmp nf40_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf40_0:
|
|
|
|
nf40_11:mov [ebp+0], ebx
|
|
nf40_12:mov [ebp+4], ebx
|
|
nf40_13:mov [ebp+8], ebx
|
|
nf40_14:mov [ebp+12], ebx
|
|
add edi, esi
|
|
|
|
nf40_21:mov [ebp+0], ebx
|
|
nf40_22:mov [ebp+4], ebx
|
|
nf40_23:mov [ebp+8], ebx
|
|
nf40_24:mov [ebp+12], ebx
|
|
add edi, esi
|
|
|
|
nf40_31:mov [ebp+0], ebx
|
|
nf40_32:mov [ebp+4], ebx
|
|
nf40_33:mov [ebp+8], ebx
|
|
nf40_34:mov [ebp+12], ebx
|
|
add edi, esi
|
|
|
|
nf40_41:mov [ebp+0], ebx
|
|
nf40_42:mov [ebp+4], ebx
|
|
nf40_43:mov [ebp+8], ebx
|
|
nf40_44:mov [ebp+12], ebx
|
|
add edi, esi
|
|
|
|
pop ecx
|
|
|
|
mov edx, ecx
|
|
ror edx, 16
|
|
mov ebx, edx
|
|
mov bx, cx
|
|
mov ebp, ecx
|
|
mov bp, dx
|
|
|
|
nf40_51:mov [ebp+0], ebx
|
|
nf40_52:mov [ebp+4], ebx
|
|
nf40_53:mov [ebp+8], ebx
|
|
nf40_54:mov [ebp+12], ebx
|
|
add edi, esi
|
|
|
|
nf40_61:mov [ebp+0], ebx
|
|
nf40_62:mov [ebp+4], ebx
|
|
nf40_63:mov [ebp+8], ebx
|
|
nf40_64:mov [ebp+12], ebx
|
|
add edi, esi
|
|
|
|
nf40_71:mov [ebp+0], ebx
|
|
nf40_72:mov [ebp+4], ebx
|
|
nf40_73:mov [ebp+8], ebx
|
|
nf40_74:mov [ebp+12], ebx
|
|
add edi, esi
|
|
|
|
nf40_81:mov [ebp+0], ebx
|
|
nf40_82:mov [ebp+4], ebx
|
|
nf40_83:mov [ebp+8], ebx
|
|
nf40_84:mov [ebp+12], ebx
|
|
|
|
pop esi
|
|
pop ebp
|
|
add esi, 16
|
|
sub edi, [nfpk_back_right] ;br ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf9: ; 8x8x2 (24 bytes)
|
|
|
|
test word [esi], 08000h
|
|
jnz near nf41
|
|
|
|
test word [esi+4], 08000h
|
|
jnz near nf25
|
|
|
|
xor eax, eax
|
|
|
|
lea ecx, [nfhpk_mov4]
|
|
lea edx, [nf9_11+2] ; Removed byte ds: - AH
|
|
|
|
mov al, [esi+8]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_11-nf9_11)], bh
|
|
mov [edx+(nf9_12-nf9_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf9_13-nf9_11)], bh
|
|
mov [edx+(nf9_14-nf9_11)], bl
|
|
|
|
mov al, [esi+9]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_15-nf9_11)], bh
|
|
mov [edx+(nf9_16-nf9_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf9_17-nf9_11)], bh
|
|
mov [edx+(nf9_18-nf9_11)], bl
|
|
|
|
|
|
mov al, [esi+10]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_21-nf9_11)], bh
|
|
mov [edx+(nf9_22-nf9_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf9_23-nf9_11)], bh
|
|
mov [edx+(nf9_24-nf9_11)], bl
|
|
|
|
mov al, [esi+11]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_25-nf9_11)], bh
|
|
mov [edx+(nf9_26-nf9_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf9_27-nf9_11)], bh
|
|
mov [edx+(nf9_28-nf9_11)], bl
|
|
|
|
|
|
mov al, [esi+12]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_31-nf9_11)], bh
|
|
mov [edx+(nf9_32-nf9_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf9_33-nf9_11)], bh
|
|
mov [edx+(nf9_34-nf9_11)], bl
|
|
|
|
mov al, [esi+13]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_35-nf9_11)], bh
|
|
mov [edx+(nf9_36-nf9_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf9_37-nf9_11)], bh
|
|
mov [edx+(nf9_38-nf9_11)], bl
|
|
|
|
|
|
mov al, [esi+14]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_41-nf9_11)], bh
|
|
mov [edx+(nf9_42-nf9_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf9_43-nf9_11)], bh
|
|
mov [edx+(nf9_44-nf9_11)], bl
|
|
|
|
mov al, [esi+15]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_45-nf9_11)], bh
|
|
mov [edx+(nf9_46-nf9_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf9_47-nf9_11)], bh
|
|
mov [edx+(nf9_48-nf9_11)], bl
|
|
|
|
|
|
lea edx, [edx+(nf9_51-nf9_11)]
|
|
|
|
mov al, [esi+16]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_51-nf9_51)], bh
|
|
mov [edx+(nf9_52-nf9_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf9_53-nf9_51)], bh
|
|
mov [edx+(nf9_54-nf9_51)], bl
|
|
|
|
mov al, [esi+17]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_55-nf9_51)], bh
|
|
mov [edx+(nf9_56-nf9_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf9_57-nf9_51)], bh
|
|
mov [edx+(nf9_58-nf9_51)], bl
|
|
|
|
|
|
mov al, [esi+18]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_61-nf9_51)], bh
|
|
mov [edx+(nf9_62-nf9_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf9_63-nf9_51)], bh
|
|
mov [edx+(nf9_64-nf9_51)], bl
|
|
|
|
mov al, [esi+19]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_65-nf9_51)], bh
|
|
mov [edx+(nf9_66-nf9_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf9_67-nf9_51)], bh
|
|
mov [edx+(nf9_68-nf9_51)], bl
|
|
|
|
|
|
mov al, [esi+20]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_71-nf9_51)], bh
|
|
mov [edx+(nf9_72-nf9_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf9_73-nf9_51)], bh
|
|
mov [edx+(nf9_74-nf9_51)], bl
|
|
|
|
mov al, [esi+21]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_75-nf9_51)], bh
|
|
mov [edx+(nf9_76-nf9_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf9_77-nf9_51)], bh
|
|
mov [edx+(nf9_78-nf9_51)], bl
|
|
|
|
|
|
mov al, [esi+22]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_81-nf9_51)], bh
|
|
mov [edx+(nf9_82-nf9_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf9_83-nf9_51)], bh
|
|
mov [edx+(nf9_84-nf9_51)], bl
|
|
|
|
mov al, [esi+23]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf9_85-nf9_51)], bh
|
|
mov [edx+(nf9_86-nf9_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf9_87-nf9_51)], bh
|
|
mov [edx+(nf9_88-nf9_51)], bl
|
|
|
|
push ebp
|
|
push esi
|
|
; Load bx,dx,cx,bp with four colors
|
|
|
|
Trans16 bx, esi
|
|
Trans16 dx, esi+2
|
|
Trans16 cx, esi+4
|
|
Trans16 bp, esi+6
|
|
|
|
mov esi, [nf_width]
|
|
|
|
jmp nf9_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf9_0:
|
|
|
|
nf9_11: mov ax, bx
|
|
shl eax, 16
|
|
nf9_12: mov ax, bx
|
|
mov [edi], eax
|
|
nf9_13: mov ax, bx
|
|
shl eax, 16
|
|
nf9_14: mov ax, bx
|
|
mov [edi+4], eax
|
|
nf9_15: mov ax, bx
|
|
shl eax, 16
|
|
nf9_16: mov ax, bx
|
|
mov [edi+8], eax
|
|
nf9_17: mov ax, bx
|
|
shl eax, 16
|
|
nf9_18: mov ax, bx
|
|
mov [edi+12], eax
|
|
add edi, esi
|
|
|
|
nf9_21: mov ax, bx
|
|
shl eax, 16
|
|
nf9_22: mov ax, bx
|
|
mov [edi], eax
|
|
nf9_23: mov ax, bx
|
|
shl eax, 16
|
|
nf9_24: mov ax, bx
|
|
mov [edi+4], eax
|
|
nf9_25: mov ax, bx
|
|
shl eax, 16
|
|
nf9_26: mov ax, bx
|
|
mov [edi+8], eax
|
|
nf9_27: mov ax, bx
|
|
shl eax, 16
|
|
nf9_28: mov ax, bx
|
|
mov [edi+12], eax
|
|
add edi, esi
|
|
|
|
nf9_31: mov ax, bx
|
|
shl eax, 16
|
|
nf9_32: mov ax, bx
|
|
mov [edi], eax
|
|
nf9_33: mov ax, bx
|
|
shl eax, 16
|
|
nf9_34: mov ax, bx
|
|
mov [edi+4], eax
|
|
nf9_35: mov ax, bx
|
|
shl eax, 16
|
|
nf9_36: mov ax, bx
|
|
mov [edi+8], eax
|
|
nf9_37: mov ax, bx
|
|
shl eax, 16
|
|
nf9_38: mov ax, bx
|
|
mov [edi+12], eax
|
|
add edi, esi
|
|
|
|
nf9_41: mov ax, bx
|
|
shl eax, 16
|
|
nf9_42: mov ax, bx
|
|
mov [edi], eax
|
|
nf9_43: mov ax, bx
|
|
shl eax, 16
|
|
nf9_44: mov ax, bx
|
|
mov [edi+4], eax
|
|
nf9_45: mov ax, bx
|
|
shl eax, 16
|
|
nf9_46: mov ax, bx
|
|
mov [edi+8], eax
|
|
nf9_47: mov ax, bx
|
|
shl eax, 16
|
|
nf9_48: mov ax, bx
|
|
mov [edi+12], eax
|
|
add edi, esi
|
|
|
|
nf9_51: mov ax, bx
|
|
shl eax, 16
|
|
nf9_52: mov ax, bx
|
|
mov [edi], eax
|
|
nf9_53: mov ax, bx
|
|
shl eax, 16
|
|
nf9_54: mov ax, bx
|
|
mov [edi+4], eax
|
|
nf9_55: mov ax, bx
|
|
shl eax, 16
|
|
nf9_56: mov ax, bx
|
|
mov [edi+8], eax
|
|
nf9_57: mov ax, bx
|
|
shl eax, 16
|
|
nf9_58: mov ax, bx
|
|
mov [edi+12], eax
|
|
add edi, esi
|
|
|
|
nf9_61: mov ax, bx
|
|
shl eax, 16
|
|
nf9_62: mov ax, bx
|
|
mov [edi], eax
|
|
nf9_63: mov ax, bx
|
|
shl eax, 16
|
|
nf9_64: mov ax, bx
|
|
mov [edi+4], eax
|
|
nf9_65: mov ax, bx
|
|
shl eax, 16
|
|
nf9_66: mov ax, bx
|
|
mov [edi+8], eax
|
|
nf9_67: mov ax, bx
|
|
shl eax, 16
|
|
nf9_68: mov ax, bx
|
|
mov [edi+12], eax
|
|
add edi, esi
|
|
|
|
nf9_71: mov ax, bx
|
|
shl eax, 16
|
|
nf9_72: mov ax, bx
|
|
mov [edi], eax
|
|
nf9_73: mov ax, bx
|
|
shl eax, 16
|
|
nf9_74: mov ax, bx
|
|
mov [edi+4], eax
|
|
nf9_75: mov ax, bx
|
|
shl eax, 16
|
|
nf9_76: mov ax, bx
|
|
mov [edi+8], eax
|
|
nf9_77: mov ax, bx
|
|
shl eax, 16
|
|
nf9_78: mov ax, bx
|
|
mov [edi+12], eax
|
|
add edi, esi
|
|
|
|
nf9_81: mov ax, bx
|
|
shl eax, 16
|
|
nf9_82: mov ax, bx
|
|
mov [edi], eax
|
|
nf9_83: mov ax, bx
|
|
shl eax, 16
|
|
nf9_84: mov ax, bx
|
|
mov [edi+4], eax
|
|
nf9_85: mov ax, bx
|
|
shl eax, 16
|
|
nf9_86: mov ax, bx
|
|
mov [edi+8], eax
|
|
nf9_87: mov ax, bx
|
|
shl eax, 16
|
|
nf9_88: mov ax, bx
|
|
mov [edi+12], eax
|
|
|
|
pop esi
|
|
pop ebp
|
|
add esi, 24
|
|
sub edi, [nfpk_back_right] ; br
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
;nf9+16
|
|
nf25: ; low 4x4x2 (12 bytes)
|
|
|
|
xor eax, eax
|
|
|
|
lea ecx, [nfhpk_mov4]
|
|
lea edx, [nf25_11+1] ; Removed byte ds: - AH
|
|
|
|
mov al, [esi+8]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf25_11-nf25_11)], bl
|
|
mov [edx+(nf25_12-nf25_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf25_13-nf25_11)], bl
|
|
mov [edx+(nf25_14-nf25_11)], bh
|
|
|
|
mov al, [esi+9]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf25_21-nf25_11)], bl
|
|
mov [edx+(nf25_22-nf25_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf25_23-nf25_11)], bl
|
|
mov [edx+(nf25_24-nf25_11)], bh
|
|
|
|
|
|
mov al, [esi+10]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf25_31-nf25_11)], bl
|
|
mov [edx+(nf25_32-nf25_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf25_33-nf25_11)], bl
|
|
mov [edx+(nf25_34-nf25_11)], bh
|
|
|
|
mov al, [esi+11]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf25_41-nf25_11)], bl
|
|
mov [edx+(nf25_42-nf25_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf25_43-nf25_11)], bl
|
|
mov [edx+(nf25_44-nf25_11)], bh
|
|
|
|
push ebp
|
|
push esi
|
|
; Load ebx,edx,ecx,ebp with four colors, duplicated in high order.
|
|
|
|
Trans16 cx, esi
|
|
shrd ebx, ecx, 16
|
|
mov bx, cx
|
|
Trans16 cx, esi+2
|
|
shrd edx, ecx, 16
|
|
mov dx, cx
|
|
Trans16_3 cx, esi+4, 1
|
|
shrd eax, ecx, 16
|
|
mov ax, cx
|
|
push eax
|
|
Trans16 cx, esi+6
|
|
shrd ebp, ecx, 16
|
|
mov bp, cx
|
|
pop ecx
|
|
|
|
mov esi, [nf_width]
|
|
|
|
jmp nf25_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf25_0:
|
|
|
|
nf25_11:mov eax, ebx
|
|
mov [edi], eax
|
|
mov [edi+esi], eax
|
|
nf25_12:mov eax, ebx
|
|
mov [edi+4], eax
|
|
mov [edi+esi+4], eax
|
|
nf25_13:mov eax, ebx
|
|
mov [edi+8], eax
|
|
mov [edi+esi+8], eax
|
|
nf25_14:mov eax, ebx
|
|
mov [edi+12], eax
|
|
mov [edi+esi+12], eax
|
|
lea edi, [edi+esi*2]
|
|
|
|
nf25_21:mov eax, ebx
|
|
mov [edi], eax
|
|
mov [edi+esi], eax
|
|
nf25_22:mov eax, ebx
|
|
mov [edi+4], eax
|
|
mov [edi+esi+4], eax
|
|
nf25_23:mov eax, ebx
|
|
mov [edi+8], eax
|
|
mov [edi+esi+8], eax
|
|
nf25_24:mov eax, ebx
|
|
mov [edi+12], eax
|
|
mov [edi+esi+12], eax
|
|
lea edi, [edi+esi*2]
|
|
|
|
nf25_31:mov eax, ebx
|
|
mov [edi], eax
|
|
mov [edi+esi], eax
|
|
nf25_32:mov eax, ebx
|
|
mov [edi+4], eax
|
|
mov [edi+esi+4], eax
|
|
nf25_33:mov eax, ebx
|
|
mov [edi+8], eax
|
|
mov [edi+esi+8], eax
|
|
nf25_34:mov eax, ebx
|
|
mov [edi+12], eax
|
|
mov [edi+esi+12], eax
|
|
lea edi, [edi+esi*2]
|
|
|
|
nf25_41:mov eax, ebx
|
|
mov [edi], eax
|
|
mov [edi+esi], eax
|
|
nf25_42:mov eax, ebx
|
|
mov [edi+4], eax
|
|
mov [edi+esi+4], eax
|
|
nf25_43:mov eax, ebx
|
|
mov [edi+8], eax
|
|
mov [edi+esi+8], eax
|
|
nf25_44:mov eax, ebx
|
|
mov [edi+12], eax
|
|
mov [edi+esi+12], eax
|
|
|
|
add edi, esi
|
|
|
|
pop esi
|
|
pop ebp
|
|
add esi, 12
|
|
sub edi, [nfpk_back_right] ;br ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
;nf9+32
|
|
nf41: ; low 4x8x2 (16 bytes)
|
|
test word [esi+4], 08000h
|
|
jnz near nf57
|
|
|
|
xor eax, eax
|
|
|
|
lea ecx, [nfhpk_mov8]
|
|
lea edx, [nf41_11+1] ; Removed byte ds: - AH
|
|
|
|
mov al, [esi+8]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf41_11-nf41_11)], bl
|
|
mov [edx+(nf41_12-nf41_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf41_13-nf41_11)], bl
|
|
mov [edx+(nf41_14-nf41_11)], bh
|
|
|
|
mov al, [esi+9]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf41_21-nf41_11)], bl
|
|
mov [edx+(nf41_22-nf41_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf41_23-nf41_11)], bl
|
|
mov [edx+(nf41_24-nf41_11)], bh
|
|
|
|
|
|
mov al, [esi+10]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf41_31-nf41_11)], bl
|
|
mov [edx+(nf41_32-nf41_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf41_33-nf41_11)], bl
|
|
mov [edx+(nf41_34-nf41_11)], bh
|
|
|
|
mov al, [esi+11]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf41_41-nf41_11)], bl
|
|
mov [edx+(nf41_42-nf41_11)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf41_43-nf41_11)], bl
|
|
mov [edx+(nf41_44-nf41_11)], bh
|
|
|
|
lea edx, [edx+(nf41_51-nf41_11)]
|
|
|
|
mov al, [esi+12]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf41_51-nf41_51)], bl
|
|
mov [edx+(nf41_52-nf41_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf41_53-nf41_51)], bl
|
|
mov [edx+(nf41_54-nf41_51)], bh
|
|
|
|
mov al, [esi+13]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf41_61-nf41_51)], bl
|
|
mov [edx+(nf41_62-nf41_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf41_63-nf41_51)], bl
|
|
mov [edx+(nf41_64-nf41_51)], bh
|
|
|
|
|
|
mov al, [esi+14]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf41_71-nf41_51)], bl
|
|
mov [edx+(nf41_72-nf41_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf41_73-nf41_51)], bl
|
|
mov [edx+(nf41_74-nf41_51)], bh
|
|
|
|
mov al, [esi+15]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf41_81-nf41_51)], bl
|
|
mov [edx+(nf41_82-nf41_51)], bh
|
|
shr ebx, 16
|
|
mov [edx+(nf41_83-nf41_51)], bl
|
|
mov [edx+(nf41_84-nf41_51)], bh
|
|
|
|
push ebp
|
|
push esi
|
|
; Load ebx,edx,ecx,ebp with four colors, duplicated in high order.
|
|
|
|
Trans16_3 cx, esi, 1
|
|
shrd ebx, ecx, 16
|
|
mov bx, cx
|
|
Trans16 cx, esi+2
|
|
shrd edx, ecx, 16
|
|
mov dx, cx
|
|
Trans16 cx, esi+4
|
|
shrd eax, ecx, 16
|
|
mov ax, cx
|
|
push eax
|
|
Trans16 cx, esi+6
|
|
shrd ebp, ecx, 16
|
|
mov bp, cx
|
|
pop ecx
|
|
|
|
mov esi, [nf_width]
|
|
|
|
jmp nf41_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf41_0:
|
|
|
|
nf41_11:mov [ebp+0], ebx
|
|
nf41_12:mov [ebp+4], ebx
|
|
nf41_13:mov [ebp+8], ebx
|
|
nf41_14:mov [ebp+12], ebx
|
|
add edi, esi
|
|
|
|
nf41_21:mov [ebp+0], ebx
|
|
nf41_22:mov [ebp+4], ebx
|
|
nf41_23:mov [ebp+8], ebx
|
|
nf41_24:mov [ebp+12], ebx
|
|
add edi, esi
|
|
|
|
nf41_31:mov [ebp+0], ebx
|
|
nf41_32:mov [ebp+4], ebx
|
|
nf41_33:mov [ebp+8], ebx
|
|
nf41_34:mov [ebp+12], ebx
|
|
add edi, esi
|
|
|
|
nf41_41:mov [ebp+0], ebx
|
|
nf41_42:mov [ebp+4], ebx
|
|
nf41_43:mov [ebp+8], ebx
|
|
nf41_44:mov [ebp+12], ebx
|
|
add edi, esi
|
|
|
|
nf41_51:mov [ebp+0], ebx
|
|
nf41_52:mov [ebp+4], ebx
|
|
nf41_53:mov [ebp+8], ebx
|
|
nf41_54:mov [ebp+12], ebx
|
|
add edi, esi
|
|
|
|
nf41_61:mov [ebp+0], ebx
|
|
nf41_62:mov [ebp+4], ebx
|
|
nf41_63:mov [ebp+8], ebx
|
|
nf41_64:mov [ebp+12], ebx
|
|
add edi, esi
|
|
|
|
nf41_71:mov [ebp+0], ebx
|
|
nf41_72:mov [ebp+4], ebx
|
|
nf41_73:mov [ebp+8], ebx
|
|
nf41_74:mov [ebp+12], ebx
|
|
add edi, esi
|
|
|
|
nf41_81:mov [ebp+0], ebx
|
|
nf41_82:mov [ebp+4], ebx
|
|
nf41_83:mov [ebp+8], ebx
|
|
nf41_84:mov [ebp+12], ebx
|
|
|
|
pop esi
|
|
pop ebp
|
|
add esi, 16
|
|
sub edi, [nfpk_back_right] ;br ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
;nf9+48
|
|
nf57: ; low 8x4x2 (16 bytes)
|
|
|
|
xor eax, eax
|
|
|
|
lea ecx, [nfhpk_mov4]
|
|
lea edx, [nf57_11+2] ; Removed byte ds: - AH
|
|
|
|
mov al, [esi+8]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf57_11-nf57_11)], bh
|
|
mov [edx+(nf57_12-nf57_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf57_13-nf57_11)], bh
|
|
mov [edx+(nf57_14-nf57_11)], bl
|
|
|
|
mov al, [esi+9]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf57_15-nf57_11)], bh
|
|
mov [edx+(nf57_16-nf57_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf57_17-nf57_11)], bh
|
|
mov [edx+(nf57_18-nf57_11)], bl
|
|
|
|
|
|
mov al, [esi+10]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf57_21-nf57_11)], bh
|
|
mov [edx+(nf57_22-nf57_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf57_23-nf57_11)], bh
|
|
mov [edx+(nf57_24-nf57_11)], bl
|
|
|
|
mov al, [esi+11]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf57_25-nf57_11)], bh
|
|
mov [edx+(nf57_26-nf57_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf57_27-nf57_11)], bh
|
|
mov [edx+(nf57_28-nf57_11)], bl
|
|
|
|
|
|
mov al, [esi+12]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf57_31-nf57_11)], bh
|
|
mov [edx+(nf57_32-nf57_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf57_33-nf57_11)], bh
|
|
mov [edx+(nf57_34-nf57_11)], bl
|
|
|
|
mov al, [esi+13]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf57_35-nf57_11)], bh
|
|
mov [edx+(nf57_36-nf57_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf57_37-nf57_11)], bh
|
|
mov [edx+(nf57_38-nf57_11)], bl
|
|
|
|
|
|
mov al, [esi+14]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf57_41-nf57_11)], bh
|
|
mov [edx+(nf57_42-nf57_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf57_43-nf57_11)], bh
|
|
mov [edx+(nf57_44-nf57_11)], bl
|
|
|
|
mov al, [esi+15]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf57_45-nf57_11)], bh
|
|
mov [edx+(nf57_46-nf57_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf57_47-nf57_11)], bh
|
|
mov [edx+(nf57_48-nf57_11)], bl
|
|
|
|
push ebp
|
|
push esi
|
|
; Load bx,dx,cx,bp with four colors
|
|
|
|
Trans16_3 bx, esi, 1
|
|
Trans16 dx, esi+2
|
|
Trans16_3 cx, esi+4, 1
|
|
Trans16 bp, esi+6
|
|
|
|
mov esi, [nf_width]
|
|
|
|
jmp nf57_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf57_0:
|
|
|
|
nf57_11:mov ax, bx
|
|
shl eax, 16
|
|
nf57_12:mov ax, bx
|
|
mov [edi], eax
|
|
mov [edi+esi], eax
|
|
nf57_13:mov ax, bx
|
|
shl eax, 16
|
|
nf57_14:mov ax, bx
|
|
mov [edi+4], eax
|
|
mov [edi+esi+4], eax
|
|
nf57_15:mov ax, bx
|
|
shl eax, 16
|
|
nf57_16:mov ax, bx
|
|
mov [edi+8], eax
|
|
mov [edi+esi+8], eax
|
|
nf57_17:mov ax, bx
|
|
shl eax, 16
|
|
nf57_18:mov ax, bx
|
|
mov [edi+12], eax
|
|
mov [edi+esi+12], eax
|
|
lea edi, [edi+esi*2]
|
|
|
|
nf57_21:mov ax, bx
|
|
shl eax, 16
|
|
nf57_22:mov ax, bx
|
|
mov [edi], eax
|
|
mov [edi+esi], eax
|
|
nf57_23:mov ax, bx
|
|
shl eax, 16
|
|
nf57_24:mov ax, bx
|
|
mov [edi+4], eax
|
|
mov [edi+esi+4], eax
|
|
nf57_25:mov ax, bx
|
|
shl eax, 16
|
|
nf57_26:mov ax, bx
|
|
mov [edi+8], eax
|
|
mov [edi+esi+8], eax
|
|
nf57_27:mov ax, bx
|
|
shl eax, 16
|
|
nf57_28:mov ax, bx
|
|
mov [edi+12], eax
|
|
mov [edi+esi+12], eax
|
|
lea edi, [edi+esi*2]
|
|
|
|
nf57_31:mov ax, bx
|
|
shl eax, 16
|
|
nf57_32:mov ax, bx
|
|
mov [edi], eax
|
|
mov [edi+esi], eax
|
|
nf57_33:mov ax, bx
|
|
shl eax, 16
|
|
nf57_34:mov ax, bx
|
|
mov [edi+4], eax
|
|
mov [edi+esi+4], eax
|
|
nf57_35:mov ax, bx
|
|
shl eax, 16
|
|
nf57_36:mov ax, bx
|
|
mov [edi+8], eax
|
|
mov [edi+esi+8], eax
|
|
nf57_37:mov ax, bx
|
|
shl eax, 16
|
|
nf57_38:mov ax, bx
|
|
mov [edi+12], eax
|
|
mov [edi+esi+12], eax
|
|
lea edi, [edi+esi*2]
|
|
|
|
nf57_41:mov ax, bx
|
|
shl eax, 16
|
|
nf57_42:mov ax, bx
|
|
mov [edi], eax
|
|
mov [edi+esi], eax
|
|
nf57_43:mov ax, bx
|
|
shl eax, 16
|
|
nf57_44:mov ax, bx
|
|
mov [edi+4], eax
|
|
mov [edi+esi+4], eax
|
|
nf57_45:mov ax, bx
|
|
shl eax, 16
|
|
nf57_46:mov ax, bx
|
|
mov [edi+8], eax
|
|
mov [edi+esi+8], eax
|
|
nf57_47:mov ax, bx
|
|
shl eax, 16
|
|
nf57_48:mov ax, bx
|
|
mov [edi+12], eax
|
|
mov [edi+esi+12], eax
|
|
add edi, esi
|
|
|
|
pop esi
|
|
pop ebp
|
|
add esi, 16
|
|
sub edi, [nfpk_back_right] ;br
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf10: ; 2x2 4x4x2 (48 bytes)
|
|
|
|
test word [esi], 08000h
|
|
jnz near nf26
|
|
|
|
xor eax, eax
|
|
|
|
lea ecx, [nfhpk_mov4]
|
|
lea edx, [nf10_11+2] ; Remove byte ds: - AH
|
|
|
|
mov al, [esi+8]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_11-nf10_11)], bh
|
|
mov [edx+(nf10_12-nf10_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf10_13-nf10_11)], bh
|
|
mov [edx+(nf10_14-nf10_11)], bl
|
|
|
|
mov al, [esi+9]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_15-nf10_11)], bh
|
|
mov [edx+(nf10_16-nf10_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf10_17-nf10_11)], bh
|
|
mov [edx+(nf10_18-nf10_11)], bl
|
|
|
|
|
|
mov al, [esi+10]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_21-nf10_11)], bh
|
|
mov [edx+(nf10_22-nf10_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf10_23-nf10_11)], bh
|
|
mov [edx+(nf10_24-nf10_11)], bl
|
|
|
|
mov al, [esi+11]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_25-nf10_11)], bh
|
|
mov [edx+(nf10_26-nf10_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf10_27-nf10_11)], bh
|
|
mov [edx+(nf10_28-nf10_11)], bl
|
|
|
|
|
|
mov al, [esi+20]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_31-nf10_11)], bh
|
|
mov [edx+(nf10_32-nf10_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf10_33-nf10_11)], bh
|
|
mov [edx+(nf10_34-nf10_11)], bl
|
|
|
|
mov al, [esi+21]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_35-nf10_11)], bh
|
|
mov [edx+(nf10_36-nf10_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf10_37-nf10_11)], bh
|
|
mov [edx+(nf10_38-nf10_11)], bl
|
|
|
|
|
|
mov al, [esi+22]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_41-nf10_11)], bh
|
|
mov [edx+(nf10_42-nf10_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf10_43-nf10_11)], bh
|
|
mov [edx+(nf10_44-nf10_11)], bl
|
|
|
|
mov al, [esi+23]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_45-nf10_11)], bh
|
|
mov [edx+(nf10_46-nf10_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf10_47-nf10_11)], bh
|
|
mov [edx+(nf10_48-nf10_11)], bl
|
|
|
|
|
|
lea edx, [edx+(nf10_51-nf10_11)]
|
|
|
|
mov al, [esi+32]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_51-nf10_51)], bh
|
|
mov [edx+(nf10_52-nf10_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf10_53-nf10_51)], bh
|
|
mov [edx+(nf10_54-nf10_51)], bl
|
|
|
|
mov al, [esi+33]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_55-nf10_51)], bh
|
|
mov [edx+(nf10_56-nf10_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf10_57-nf10_51)], bh
|
|
mov [edx+(nf10_58-nf10_51)], bl
|
|
|
|
|
|
mov al, [esi+34]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_61-nf10_51)], bh
|
|
mov [edx+(nf10_62-nf10_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf10_63-nf10_51)], bh
|
|
mov [edx+(nf10_64-nf10_51)], bl
|
|
|
|
mov al, [esi+35]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_65-nf10_51)], bh
|
|
mov [edx+(nf10_66-nf10_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf10_67-nf10_51)], bh
|
|
mov [edx+(nf10_68-nf10_51)], bl
|
|
|
|
|
|
mov al, [esi+44]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_71-nf10_51)], bh
|
|
mov [edx+(nf10_72-nf10_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf10_73-nf10_51)], bh
|
|
mov [edx+(nf10_74-nf10_51)], bl
|
|
|
|
mov al, [esi+45]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_75-nf10_51)], bh
|
|
mov [edx+(nf10_76-nf10_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf10_77-nf10_51)], bh
|
|
mov [edx+(nf10_78-nf10_51)], bl
|
|
|
|
|
|
mov al, [esi+46]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_81-nf10_51)], bh
|
|
mov [edx+(nf10_82-nf10_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf10_83-nf10_51)], bh
|
|
mov [edx+(nf10_84-nf10_51)], bl
|
|
|
|
mov al, [esi+47]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf10_85-nf10_51)], bh
|
|
mov [edx+(nf10_86-nf10_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf10_87-nf10_51)], bh
|
|
mov [edx+(nf10_88-nf10_51)], bl
|
|
|
|
push ebp
|
|
push esi
|
|
; Load bx,dx,cx,bp with four colors
|
|
|
|
Trans16 bx, esi
|
|
Trans16 dx, esi+2
|
|
Trans16 cx, esi+4
|
|
Trans16 bp, esi+6
|
|
|
|
mov esi, [nf_width]
|
|
|
|
jmp nf10_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf10_0:
|
|
|
|
nf10_11:mov ax, bx
|
|
shl eax, 16
|
|
nf10_12:mov ax, bx
|
|
mov [edi], eax
|
|
nf10_13:mov ax, bx
|
|
shl eax, 16
|
|
nf10_14:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf10_15:mov ax, bx
|
|
shl eax, 16
|
|
nf10_16:mov ax, bx
|
|
mov [edi], eax
|
|
nf10_17:mov ax, bx
|
|
shl eax, 16
|
|
nf10_18:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf10_21:mov ax, bx
|
|
shl eax, 16
|
|
nf10_22:mov ax, bx
|
|
mov [edi], eax
|
|
nf10_23:mov ax, bx
|
|
shl eax, 16
|
|
nf10_24:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf10_25:mov ax, bx
|
|
shl eax, 16
|
|
nf10_26:mov ax, bx
|
|
mov [edi], eax
|
|
nf10_27:mov ax, bx
|
|
shl eax, 16
|
|
nf10_28:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
; Load bx,dx,cx,bp with four colors
|
|
|
|
mov esi, [esp]
|
|
Trans16 bx, esi+12
|
|
Trans16 dx, esi+14
|
|
Trans16 cx, esi+16
|
|
Trans16 bp, esi+18
|
|
mov esi, [nf_width]
|
|
|
|
nf10_31:mov ax, bx
|
|
shl eax, 16
|
|
nf10_32:mov ax, bx
|
|
mov [edi], eax
|
|
nf10_33:mov ax, bx
|
|
shl eax, 16
|
|
nf10_34:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf10_35:mov ax, bx
|
|
shl eax, 16
|
|
nf10_36:mov ax, bx
|
|
mov [edi], eax
|
|
nf10_37:mov ax, bx
|
|
shl eax, 16
|
|
nf10_38:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf10_41:mov ax, bx
|
|
shl eax, 16
|
|
nf10_42:mov ax, bx
|
|
mov [edi], eax
|
|
nf10_43:mov ax, bx
|
|
shl eax, 16
|
|
nf10_44:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf10_45:mov ax, bx
|
|
shl eax, 16
|
|
nf10_46:mov ax, bx
|
|
mov [edi], eax
|
|
nf10_47:mov ax, bx
|
|
shl eax, 16
|
|
nf10_48:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
lea eax, [esi*8-8]
|
|
sub edi, eax
|
|
|
|
; Load bx,dx,cx,bp with four colors
|
|
|
|
mov esi, [esp]
|
|
Trans16 bx, esi+24
|
|
Trans16 dx, esi+26
|
|
Trans16 cx, esi+28
|
|
Trans16 bp, esi+30
|
|
mov esi, [nf_width]
|
|
|
|
nf10_51:mov ax, bx
|
|
shl eax, 16
|
|
nf10_52:mov ax, bx
|
|
mov [edi], eax
|
|
nf10_53:mov ax, bx
|
|
shl eax, 16
|
|
nf10_54:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf10_55:mov ax, bx
|
|
shl eax, 16
|
|
nf10_56:mov ax, bx
|
|
mov [edi], eax
|
|
nf10_57:mov ax, bx
|
|
shl eax, 16
|
|
nf10_58:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf10_61:mov ax, bx
|
|
shl eax, 16
|
|
nf10_62:mov ax, bx
|
|
mov [edi], eax
|
|
nf10_63:mov ax, bx
|
|
shl eax, 16
|
|
nf10_64:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf10_65:mov ax, bx
|
|
shl eax, 16
|
|
nf10_66:mov ax, bx
|
|
mov [edi], eax
|
|
nf10_67:mov ax, bx
|
|
shl eax, 16
|
|
nf10_68:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
; Load bx,dx,cx,bp with four colors
|
|
|
|
mov esi, [esp]
|
|
Trans16 bx, esi+36
|
|
Trans16 dx, esi+38
|
|
Trans16 cx, esi+40
|
|
Trans16 bp, esi+42
|
|
mov esi, [nf_width]
|
|
|
|
nf10_71:mov ax, bx
|
|
shl eax, 16
|
|
nf10_72:mov ax, bx
|
|
mov [edi], eax
|
|
nf10_73:mov ax, bx
|
|
shl eax, 16
|
|
nf10_74:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf10_75:mov ax, bx
|
|
shl eax, 16
|
|
nf10_76:mov ax, bx
|
|
mov [edi], eax
|
|
nf10_77:mov ax, bx
|
|
shl eax, 16
|
|
nf10_78:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf10_81:mov ax, bx
|
|
shl eax, 16
|
|
nf10_82:mov ax, bx
|
|
mov [edi], eax
|
|
nf10_83:mov ax, bx
|
|
shl eax, 16
|
|
nf10_84:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf10_85:mov ax, bx
|
|
shl eax, 16
|
|
nf10_86:mov ax, bx
|
|
mov [edi], eax
|
|
nf10_87:mov ax, bx
|
|
shl eax, 16
|
|
nf10_88:mov ax, bx
|
|
mov [edi+4], eax
|
|
|
|
pop esi
|
|
pop ebp
|
|
add esi, 48
|
|
sub edi, 8
|
|
sub edi, [nfpk_back_right] ;br ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
;nf10+16
|
|
nf26: ; 2x1 4x8x2 (32 bytes)
|
|
|
|
test word [esi+16], 08000h
|
|
jnz near nf42
|
|
|
|
xor eax, eax
|
|
|
|
lea ecx, [nfhpk_mov4]
|
|
lea edx, [nf26_11+2] ; Removed byte ds: - AH
|
|
|
|
mov al, [esi+8]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_11-nf26_11)], bh
|
|
mov [edx+(nf26_12-nf26_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf26_13-nf26_11)], bh
|
|
mov [edx+(nf26_14-nf26_11)], bl
|
|
|
|
mov al, [esi+9]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_15-nf26_11)], bh
|
|
mov [edx+(nf26_16-nf26_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf26_17-nf26_11)], bh
|
|
mov [edx+(nf26_18-nf26_11)], bl
|
|
|
|
|
|
mov al, [esi+10]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_21-nf26_11)], bh
|
|
mov [edx+(nf26_22-nf26_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf26_23-nf26_11)], bh
|
|
mov [edx+(nf26_24-nf26_11)], bl
|
|
|
|
mov al, [esi+11]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_25-nf26_11)], bh
|
|
mov [edx+(nf26_26-nf26_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf26_27-nf26_11)], bh
|
|
mov [edx+(nf26_28-nf26_11)], bl
|
|
|
|
mov al, [esi+12]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_31-nf26_11)], bh
|
|
mov [edx+(nf26_32-nf26_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf26_33-nf26_11)], bh
|
|
mov [edx+(nf26_34-nf26_11)], bl
|
|
|
|
mov al, [esi+13]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_35-nf26_11)], bh
|
|
mov [edx+(nf26_36-nf26_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf26_37-nf26_11)], bh
|
|
mov [edx+(nf26_38-nf26_11)], bl
|
|
|
|
|
|
mov al, [esi+14]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_41-nf26_11)], bh
|
|
mov [edx+(nf26_42-nf26_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf26_43-nf26_11)], bh
|
|
mov [edx+(nf26_44-nf26_11)], bl
|
|
|
|
mov al, [esi+15]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_45-nf26_11)], bh
|
|
mov [edx+(nf26_46-nf26_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf26_47-nf26_11)], bh
|
|
mov [edx+(nf26_48-nf26_11)], bl
|
|
|
|
|
|
lea edx, [edx+(nf26_51-nf26_11)]
|
|
|
|
mov al, [esi+24]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_51-nf26_51)], bh
|
|
mov [edx+(nf26_52-nf26_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf26_53-nf26_51)], bh
|
|
mov [edx+(nf26_54-nf26_51)], bl
|
|
|
|
mov al, [esi+25]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_55-nf26_51)], bh
|
|
mov [edx+(nf26_56-nf26_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf26_57-nf26_51)], bh
|
|
mov [edx+(nf26_58-nf26_51)], bl
|
|
|
|
|
|
mov al, [esi+26]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_61-nf26_51)], bh
|
|
mov [edx+(nf26_62-nf26_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf26_63-nf26_51)], bh
|
|
mov [edx+(nf26_64-nf26_51)], bl
|
|
|
|
mov al, [esi+27]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_65-nf26_51)], bh
|
|
mov [edx+(nf26_66-nf26_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf26_67-nf26_51)], bh
|
|
mov [edx+(nf26_68-nf26_51)], bl
|
|
|
|
|
|
mov al, [esi+28]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_71-nf26_51)], bh
|
|
mov [edx+(nf26_72-nf26_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf26_73-nf26_51)], bh
|
|
mov [edx+(nf26_74-nf26_51)], bl
|
|
|
|
mov al, [esi+29]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_75-nf26_51)], bh
|
|
mov [edx+(nf26_76-nf26_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf26_77-nf26_51)], bh
|
|
mov [edx+(nf26_78-nf26_51)], bl
|
|
|
|
|
|
mov al, [esi+30]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_81-nf26_51)], bh
|
|
mov [edx+(nf26_82-nf26_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf26_83-nf26_51)], bh
|
|
mov [edx+(nf26_84-nf26_51)], bl
|
|
|
|
mov al, [esi+31]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf26_85-nf26_51)], bh
|
|
mov [edx+(nf26_86-nf26_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf26_87-nf26_51)], bh
|
|
mov [edx+(nf26_88-nf26_51)], bl
|
|
|
|
push ebp
|
|
push esi
|
|
; Load bx,dx,cx,bp with four colors
|
|
|
|
Trans16_3 bx, esi, 1
|
|
Trans16 dx, esi+2
|
|
Trans16 cx, esi+4
|
|
Trans16 bp, esi+6
|
|
|
|
mov esi, [nf_width]
|
|
|
|
jmp nf26_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf26_0:
|
|
|
|
nf26_11:mov ax, bx
|
|
shl eax, 16
|
|
nf26_12:mov ax, bx
|
|
mov [edi], eax
|
|
nf26_13:mov ax, bx
|
|
shl eax, 16
|
|
nf26_14:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf26_15:mov ax, bx
|
|
shl eax, 16
|
|
nf26_16:mov ax, bx
|
|
mov [edi], eax
|
|
nf26_17:mov ax, bx
|
|
shl eax, 16
|
|
nf26_18:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf26_21:mov ax, bx
|
|
shl eax, 16
|
|
nf26_22:mov ax, bx
|
|
mov [edi], eax
|
|
nf26_23:mov ax, bx
|
|
shl eax, 16
|
|
nf26_24:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf26_25:mov ax, bx
|
|
shl eax, 16
|
|
nf26_26:mov ax, bx
|
|
mov [edi], eax
|
|
nf26_27:mov ax, bx
|
|
shl eax, 16
|
|
nf26_28:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf26_31:mov ax, bx
|
|
shl eax, 16
|
|
nf26_32:mov ax, bx
|
|
mov [edi], eax
|
|
nf26_33:mov ax, bx
|
|
shl eax, 16
|
|
nf26_34:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf26_35:mov ax, bx
|
|
shl eax, 16
|
|
nf26_36:mov ax, bx
|
|
mov [edi], eax
|
|
nf26_37:mov ax, bx
|
|
shl eax, 16
|
|
nf26_38:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf26_41:mov ax, bx
|
|
shl eax, 16
|
|
nf26_42:mov ax, bx
|
|
mov [edi], eax
|
|
nf26_43:mov ax, bx
|
|
shl eax, 16
|
|
nf26_44:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf26_45:mov ax, bx
|
|
shl eax, 16
|
|
nf26_46:mov ax, bx
|
|
mov [edi], eax
|
|
nf26_47:mov ax, bx
|
|
shl eax, 16
|
|
nf26_48:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
lea eax, [esi*8-8]
|
|
sub edi, eax
|
|
|
|
; Load bx,dx,cx,bp with four colors
|
|
|
|
mov esi, [esp]
|
|
Trans16 bx, esi+16
|
|
Trans16 dx, esi+18
|
|
Trans16 cx, esi+20
|
|
Trans16 bp, esi+22
|
|
mov esi, [nf_width]
|
|
|
|
nf26_51:mov ax, bx
|
|
shl eax, 16
|
|
nf26_52:mov ax, bx
|
|
mov [edi], eax
|
|
nf26_53:mov ax, bx
|
|
shl eax, 16
|
|
nf26_54:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf26_55:mov ax, bx
|
|
shl eax, 16
|
|
nf26_56:mov ax, bx
|
|
mov [edi], eax
|
|
nf26_57:mov ax, bx
|
|
shl eax, 16
|
|
nf26_58:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf26_61:mov ax, bx
|
|
shl eax, 16
|
|
nf26_62:mov ax, bx
|
|
mov [edi], eax
|
|
nf26_63:mov ax, bx
|
|
shl eax, 16
|
|
nf26_64:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf26_65:mov ax, bx
|
|
shl eax, 16
|
|
nf26_66:mov ax, bx
|
|
mov [edi], eax
|
|
nf26_67:mov ax, bx
|
|
shl eax, 16
|
|
nf26_68:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf26_71:mov ax, bx
|
|
shl eax, 16
|
|
nf26_72:mov ax, bx
|
|
mov [edi], eax
|
|
nf26_73:mov ax, bx
|
|
shl eax, 16
|
|
nf26_74:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf26_75:mov ax, bx
|
|
shl eax, 16
|
|
nf26_76:mov ax, bx
|
|
mov [edi], eax
|
|
nf26_77:mov ax, bx
|
|
shl eax, 16
|
|
nf26_78:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf26_81:mov ax, bx
|
|
shl eax, 16
|
|
nf26_82:mov ax, bx
|
|
mov [edi], eax
|
|
nf26_83:mov ax, bx
|
|
shl eax, 16
|
|
nf26_84:mov ax, bx
|
|
mov [edi+4], eax
|
|
add edi, esi
|
|
|
|
nf26_85:mov ax, bx
|
|
shl eax, 16
|
|
nf26_86:mov ax, bx
|
|
mov [edi], eax
|
|
nf26_87:mov ax, bx
|
|
shl eax, 16
|
|
nf26_88:mov ax, bx
|
|
mov [edi+4], eax
|
|
|
|
pop esi
|
|
pop ebp
|
|
add esi, 32
|
|
sub edi, 8
|
|
sub edi, [nfpk_back_right] ;br ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
;nf10+32
|
|
nf42: ; 1x2 8x4x2 (32 bytes)
|
|
|
|
xor eax, eax
|
|
|
|
lea ecx, [nfhpk_mov4]
|
|
lea edx, [nf42_11+2] ; removed byte ds: - AH
|
|
|
|
mov al, [esi+8]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_11-nf42_11)], bh
|
|
mov [edx+(nf42_12-nf42_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf42_13-nf42_11)], bh
|
|
mov [edx+(nf42_14-nf42_11)], bl
|
|
|
|
mov al, [esi+9]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_15-nf42_11)], bh
|
|
mov [edx+(nf42_16-nf42_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf42_17-nf42_11)], bh
|
|
mov [edx+(nf42_18-nf42_11)], bl
|
|
|
|
|
|
mov al, [esi+10]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_21-nf42_11)], bh
|
|
mov [edx+(nf42_22-nf42_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf42_23-nf42_11)], bh
|
|
mov [edx+(nf42_24-nf42_11)], bl
|
|
|
|
mov al, [esi+11]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_25-nf42_11)], bh
|
|
mov [edx+(nf42_26-nf42_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf42_27-nf42_11)], bh
|
|
mov [edx+(nf42_28-nf42_11)], bl
|
|
|
|
|
|
mov al, [esi+12]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_31-nf42_11)], bh
|
|
mov [edx+(nf42_32-nf42_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf42_33-nf42_11)], bh
|
|
mov [edx+(nf42_34-nf42_11)], bl
|
|
|
|
mov al, [esi+13]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_35-nf42_11)], bh
|
|
mov [edx+(nf42_36-nf42_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf42_37-nf42_11)], bh
|
|
mov [edx+(nf42_38-nf42_11)], bl
|
|
|
|
|
|
mov al, [esi+14]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_41-nf42_11)], bh
|
|
mov [edx+(nf42_42-nf42_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf42_43-nf42_11)], bh
|
|
mov [edx+(nf42_44-nf42_11)], bl
|
|
|
|
mov al, [esi+15]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_45-nf42_11)], bh
|
|
mov [edx+(nf42_46-nf42_11)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf42_47-nf42_11)], bh
|
|
mov [edx+(nf42_48-nf42_11)], bl
|
|
|
|
|
|
lea edx, [edx+(nf42_51-nf42_11)]
|
|
|
|
mov al, [esi+24]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_51-nf42_51)], bh
|
|
mov [edx+(nf42_52-nf42_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf42_53-nf42_51)], bh
|
|
mov [edx+(nf42_54-nf42_51)], bl
|
|
|
|
mov al, [esi+25]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_55-nf42_51)], bh
|
|
mov [edx+(nf42_56-nf42_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf42_57-nf42_51)], bh
|
|
mov [edx+(nf42_58-nf42_51)], bl
|
|
|
|
|
|
mov al, [esi+26]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_61-nf42_51)], bh
|
|
mov [edx+(nf42_62-nf42_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf42_63-nf42_51)], bh
|
|
mov [edx+(nf42_64-nf42_51)], bl
|
|
|
|
mov al, [esi+27]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_65-nf42_51)], bh
|
|
mov [edx+(nf42_66-nf42_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf42_67-nf42_51)], bh
|
|
mov [edx+(nf42_68-nf42_51)], bl
|
|
|
|
|
|
mov al, [esi+28]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_71-nf42_51)], bh
|
|
mov [edx+(nf42_72-nf42_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf42_73-nf42_51)], bh
|
|
mov [edx+(nf42_74-nf42_51)], bl
|
|
|
|
mov al, [esi+29]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_75-nf42_51)], bh
|
|
mov [edx+(nf42_76-nf42_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf42_77-nf42_51)], bh
|
|
mov [edx+(nf42_78-nf42_51)], bl
|
|
|
|
|
|
mov al, [esi+30]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_81-nf42_51)], bh
|
|
mov [edx+(nf42_82-nf42_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf42_83-nf42_51)], bh
|
|
mov [edx+(nf42_84-nf42_51)], bl
|
|
|
|
mov al, [esi+31]
|
|
mov ebx, [ecx+eax*4]
|
|
mov [edx+(nf42_85-nf42_51)], bh
|
|
mov [edx+(nf42_86-nf42_51)], bl
|
|
shr ebx, 16
|
|
mov [edx+(nf42_87-nf42_51)], bh
|
|
mov [edx+(nf42_88-nf42_51)], bl
|
|
|
|
push ebp
|
|
push esi
|
|
; Load bx,dx,cx,bp with four colors
|
|
|
|
Trans16_3 bx, esi, 1
|
|
Trans16 dx, esi+2
|
|
Trans16 cx, esi+4
|
|
Trans16 bp, esi+6
|
|
|
|
mov esi, [nf_width]
|
|
|
|
jmp nf42_0 ; flush prefetch
|
|
ALIGN 4
|
|
nf42_0:
|
|
|
|
nf42_11:mov ax, bx
|
|
shl eax, 16
|
|
nf42_12:mov ax, bx
|
|
mov [edi], eax
|
|
nf42_13:mov ax, bx
|
|
shl eax, 16
|
|
nf42_14:mov ax, bx
|
|
mov [edi+4], eax
|
|
nf42_15:mov ax, bx
|
|
shl eax, 16
|
|
nf42_16:mov ax, bx
|
|
mov [edi+8], eax
|
|
nf42_17:mov ax, bx
|
|
shl eax, 16
|
|
nf42_18:mov ax, bx
|
|
mov [edi+12], eax
|
|
add edi, esi
|
|
|
|
nf42_21:mov ax, bx
|
|
shl eax, 16
|
|
nf42_22:mov ax, bx
|
|
mov [edi], eax
|
|
nf42_23:mov ax, bx
|
|
shl eax, 16
|
|
nf42_24:mov ax, bx
|
|
mov [edi+4], eax
|
|
nf42_25:mov ax, bx
|
|
shl eax, 16
|
|
nf42_26:mov ax, bx
|
|
mov [edi+8], eax
|
|
nf42_27:mov ax, bx
|
|
shl eax, 16
|
|
nf42_28:mov ax, bx
|
|
mov [edi+12], eax
|
|
add edi, esi
|
|
|
|
nf42_31:mov ax, bx
|
|
shl eax, 16
|
|
nf42_32:mov ax, bx
|
|
mov [edi], eax
|
|
nf42_33:mov ax, bx
|
|
shl eax, 16
|
|
nf42_34:mov ax, bx
|
|
mov [edi+4], eax
|
|
nf42_35:mov ax, bx
|
|
shl eax, 16
|
|
nf42_36:mov ax, bx
|
|
mov [edi+8], eax
|
|
nf42_37:mov ax, bx
|
|
shl eax, 16
|
|
nf42_38:mov ax, bx
|
|
mov [edi+12], eax
|
|
add edi, esi
|
|
|
|
nf42_41:mov ax, bx
|
|
shl eax, 16
|
|
nf42_42:mov ax, bx
|
|
mov [edi], eax
|
|
nf42_43:mov ax, bx
|
|
shl eax, 16
|
|
nf42_44:mov ax, bx
|
|
mov [edi+4], eax
|
|
nf42_45:mov ax, bx
|
|
shl eax, 16
|
|
nf42_46:mov ax, bx
|
|
mov [edi+8], eax
|
|
nf42_47:mov ax, bx
|
|
shl eax, 16
|
|
nf42_48:mov ax, bx
|
|
mov [edi+12], eax
|
|
add edi, esi
|
|
|
|
; Load bx,dx,cx,bp with four colors
|
|
|
|
mov esi, [esp]
|
|
Trans16_3 bx, esi+16, 1
|
|
Trans16 dx, esi+18
|
|
Trans16 cx, esi+20
|
|
Trans16 bp, esi+22
|
|
mov esi, [nf_width]
|
|
|
|
nf42_51:mov ax, bx
|
|
shl eax, 16
|
|
nf42_52:mov ax, bx
|
|
mov [edi], eax
|
|
nf42_53:mov ax, bx
|
|
shl eax, 16
|
|
nf42_54:mov ax, bx
|
|
mov [edi+4], eax
|
|
nf42_55:mov ax, bx
|
|
shl eax, 16
|
|
nf42_56:mov ax, bx
|
|
mov [edi+8], eax
|
|
nf42_57:mov ax, bx
|
|
shl eax, 16
|
|
nf42_58:mov ax, bx
|
|
mov [edi+12], eax
|
|
add edi, esi
|
|
|
|
nf42_61:mov ax, bx
|
|
shl eax, 16
|
|
nf42_62:mov ax, bx
|
|
mov [edi], eax
|
|
nf42_63:mov ax, bx
|
|
shl eax, 16
|
|
nf42_64:mov ax, bx
|
|
mov [edi+4], eax
|
|
nf42_65:mov ax, bx
|
|
shl eax, 16
|
|
nf42_66:mov ax, bx
|
|
mov [edi+8], eax
|
|
nf42_67:mov ax, bx
|
|
shl eax, 16
|
|
nf42_68:mov ax, bx
|
|
mov [edi+12], eax
|
|
add edi, esi
|
|
|
|
nf42_71:mov ax, bx
|
|
shl eax, 16
|
|
nf42_72:mov ax, bx
|
|
mov [edi], eax
|
|
nf42_73:mov ax, bx
|
|
shl eax, 16
|
|
nf42_74:mov ax, bx
|
|
mov [edi+4], eax
|
|
nf42_75:mov ax, bx
|
|
shl eax, 16
|
|
nf42_76:mov ax, bx
|
|
mov [edi+8], eax
|
|
nf42_77:mov ax, bx
|
|
shl eax, 16
|
|
nf42_78:mov ax, bx
|
|
mov [edi+12], eax
|
|
add edi, esi
|
|
|
|
nf42_81:mov ax, bx
|
|
shl eax, 16
|
|
nf42_82:mov ax, bx
|
|
mov [edi], eax
|
|
nf42_83:mov ax, bx
|
|
shl eax, 16
|
|
nf42_84:mov ax, bx
|
|
mov [edi+4], eax
|
|
nf42_85:mov ax, bx
|
|
shl eax, 16
|
|
nf42_86:mov ax, bx
|
|
mov [edi+8], eax
|
|
nf42_87:mov ax, bx
|
|
shl eax, 16
|
|
nf42_88:mov ax, bx
|
|
mov [edi+12], eax
|
|
|
|
pop esi
|
|
pop ebp
|
|
add esi, 32
|
|
sub edi, [nfpk_back_right] ;br
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf11: ; 8x8x16 (128 bytes)
|
|
mov edx, [nf_width]
|
|
|
|
%macro Trans16Blk 1 ; MACRO idx
|
|
Trans16 bx, %1 ;idx
|
|
mov [edi], bx
|
|
Trans16 bx, (%1 + 2) ;idx+2
|
|
mov [edi+2], bx
|
|
Trans16 bx, (%1 + 4) ;idx+4
|
|
mov [edi+4], bx
|
|
Trans16 bx, (%1 + 6) ;idx+6
|
|
mov [edi+6], bx
|
|
Trans16 bx, (%1 + 8) ;idx+8
|
|
mov [edi+8], bx
|
|
Trans16 bx, (%1 + 10) ;idx+10
|
|
mov [edi+10], bx
|
|
Trans16 bx, (%1 + 12) ;idx+12
|
|
mov [edi+12], bx
|
|
Trans16 bx, (%1 + 14) ;idx+14
|
|
mov [edi+14], bx
|
|
%endmacro
|
|
|
|
Trans16Blk esi ;0
|
|
add edi, edx
|
|
Trans16Blk esi+16 ;1
|
|
add edi, edx
|
|
Trans16Blk esi+32 ;2
|
|
add edi, edx
|
|
Trans16Blk esi+48 ;3
|
|
add edi, edx
|
|
Trans16Blk esi+64 ;4
|
|
add edi, edx
|
|
Trans16Blk esi+80 ;5
|
|
add edi, edx
|
|
Trans16Blk esi+96 ;6
|
|
add edi, edx
|
|
Trans16Blk esi+112 ;7
|
|
|
|
add esi, 128
|
|
sub edi, [nfpk_back_right] ;br ; (SHEIGHT-1)*width+8
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf12: ; low 4x4x16 (32 bytes)
|
|
|
|
mov edx, [nf_width]
|
|
|
|
Trans16 bx, esi
|
|
shrd eax, ebx, 16
|
|
mov ax, bx
|
|
mov [edi], eax
|
|
mov [edi+edx], eax
|
|
|
|
Trans16 bx, esi+2
|
|
shrd eax, ebx, 16
|
|
mov ax, bx
|
|
mov [edi+4], eax
|
|
mov [edi+edx+4], eax
|
|
|
|
Trans16 bx, esi+4
|
|
shrd eax, ebx, 16
|
|
mov ax, bx
|
|
mov [edi+8], eax
|
|
mov [edi+edx+8], eax
|
|
|
|
Trans16 bx, esi+6
|
|
shrd eax, ebx, 16
|
|
mov ax, bx
|
|
mov [edi+12], eax
|
|
mov [edi+edx+12], eax
|
|
|
|
lea edi, [edi+edx*2]
|
|
|
|
Trans16 bx, esi+8
|
|
shrd eax, ebx, 16
|
|
mov ax, bx
|
|
mov [edi], eax
|
|
mov [edi+edx], eax
|
|
|
|
Trans16 bx, esi+10
|
|
shrd eax, ebx, 16
|
|
mov ax, bx
|
|
mov [edi+4], eax
|
|
mov [edi+edx+4], eax
|
|
|
|
Trans16 bx, esi+12
|
|
shrd eax, ebx, 16
|
|
mov ax, bx
|
|
mov [edi+8], eax
|
|
mov [edi+edx+8], eax
|
|
|
|
Trans16 bx, esi+14
|
|
shrd eax, ebx, 16
|
|
mov ax, bx
|
|
mov [edi+12], eax
|
|
mov [edi+edx+12], eax
|
|
|
|
lea edi, [edi+edx*2]
|
|
|
|
Trans16 bx, esi+16
|
|
shrd eax, ebx, 16
|
|
mov ax, bx
|
|
mov [edi], eax
|
|
mov [edi+edx], eax
|
|
|
|
Trans16 bx, esi+18
|
|
shrd eax, ebx, 16
|
|
mov ax, bx
|
|
mov [edi+4], eax
|
|
mov [edi+edx+4], eax
|
|
|
|
Trans16 bx, esi+20
|
|
shrd eax, ebx, 16
|
|
mov ax, bx
|
|
mov [edi+8], eax
|
|
mov [edi+edx+8], eax
|
|
|
|
Trans16 bx, esi+22
|
|
shrd eax, ebx, 16
|
|
mov ax, bx
|
|
mov [edi+12], eax
|
|
mov [edi+edx+12], eax
|
|
|
|
lea edi, [edi+edx*2]
|
|
|
|
Trans16 bx, esi+24
|
|
shrd eax, ebx, 16
|
|
mov ax, bx
|
|
mov [edi], eax
|
|
mov [edi+edx], eax
|
|
|
|
Trans16 bx, esi+26
|
|
shrd eax, ebx, 16
|
|
mov ax, bx
|
|
mov [edi+4], eax
|
|
mov [edi+edx+4], eax
|
|
|
|
Trans16 bx, esi+28
|
|
shrd eax, ebx, 16
|
|
mov ax, bx
|
|
mov [edi+8], eax
|
|
mov [edi+edx+8], eax
|
|
|
|
Trans16 bx, esi+30
|
|
shrd eax, ebx, 16
|
|
mov ax, bx
|
|
mov [edi+12], eax
|
|
mov [edi+edx+12], eax
|
|
|
|
add edi, edx
|
|
|
|
sub edi, [nfpk_back_right] ;br
|
|
add esi, 32
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf13: ; 2x2 4x4x0 (8 bytes)
|
|
mov edx, [nf_width]
|
|
|
|
Trans16 cx, esi
|
|
shrd ebx, ecx, 16
|
|
mov bx, cx
|
|
|
|
Trans16 cx, esi+2
|
|
shrd eax, ecx, 16
|
|
mov ax, cx
|
|
mov ecx, eax
|
|
|
|
mov [edi], ebx
|
|
mov [edi+4], ebx
|
|
mov [edi+8], ecx
|
|
mov [edi+12], ecx
|
|
mov [edi+edx], ebx
|
|
mov [edi+edx+4], ebx
|
|
mov [edi+edx+8], ecx
|
|
mov [edi+edx+12], ecx
|
|
lea edi, [edi+edx*2]
|
|
mov [edi], ebx
|
|
mov [edi+4], ebx
|
|
mov [edi+8], ecx
|
|
mov [edi+12], ecx
|
|
mov [edi+edx], ebx
|
|
mov [edi+edx+4], ebx
|
|
mov [edi+edx+8], ecx
|
|
mov [edi+edx+12], ecx
|
|
lea edi, [edi+edx*2]
|
|
|
|
Trans16 cx, esi+4
|
|
shrd ebx, ecx, 16
|
|
mov bx, cx
|
|
|
|
Trans16 cx, esi+6
|
|
shrd eax, ecx, 16
|
|
mov ax, cx
|
|
mov ecx, eax
|
|
|
|
mov [edi], ebx
|
|
mov [edi+4], ebx
|
|
mov [edi+8], ecx
|
|
mov [edi+12], ecx
|
|
mov [edi+edx], ebx
|
|
mov [edi+edx+4], ebx
|
|
mov [edi+edx+8], ecx
|
|
mov [edi+edx+12], ecx
|
|
lea edi, [edi+edx*2]
|
|
mov [edi], ebx
|
|
mov [edi+4], ebx
|
|
mov [edi+8], ecx
|
|
mov [edi+12], ecx
|
|
mov [edi+edx], ebx
|
|
mov [edi+edx+4], ebx
|
|
mov [edi+edx+8], ecx
|
|
mov [edi+edx+12], ecx
|
|
add edi, edx
|
|
|
|
sub edi, [nfpk_back_right] ; br
|
|
add esi, 8
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf14: ; 8x8x0 (2 bytes)
|
|
Trans16 cx, esi
|
|
add esi, 2
|
|
shrd ebx, ecx, 16
|
|
mov bx, cx
|
|
|
|
nf_solid:
|
|
mov edx, [nf_width]
|
|
|
|
mov [edi], ebx
|
|
mov [edi+4], ebx
|
|
mov [edi+8], ebx
|
|
mov [edi+12], ebx
|
|
add edi, edx
|
|
mov [edi], ebx
|
|
mov [edi+4], ebx
|
|
mov [edi+8], ebx
|
|
mov [edi+12], ebx
|
|
add edi, edx
|
|
mov [edi], ebx
|
|
mov [edi+4], ebx
|
|
mov [edi+8], ebx
|
|
mov [edi+12], ebx
|
|
add edi, edx
|
|
mov [edi], ebx
|
|
mov [edi+4], ebx
|
|
mov [edi+8], ebx
|
|
mov [edi+12], ebx
|
|
add edi, edx
|
|
mov [edi], ebx
|
|
mov [edi+4], ebx
|
|
mov [edi+8], ebx
|
|
mov [edi+12], ebx
|
|
add edi, edx
|
|
mov [edi], ebx
|
|
mov [edi+4], ebx
|
|
mov [edi+8], ebx
|
|
mov [edi+12], ebx
|
|
add edi, edx
|
|
mov [edi], ebx
|
|
mov [edi+4], ebx
|
|
mov [edi+8], ebx
|
|
mov [edi+12], ebx
|
|
add edi, edx
|
|
mov [edi], ebx
|
|
mov [edi+4], ebx
|
|
mov [edi+8], ebx
|
|
mov [edi+12], ebx
|
|
|
|
sub edi, [nfpk_back_right] ;br ; (SHEIGHT-1)*width+8
|
|
|
|
retn
|
|
|
|
;----------------------------------------
|
|
ALIGN 4
|
|
nf15: ; unused
|
|
retn
|
|
|
|
;nfHPkDecomp ENDP
|
|
|
|
|