; mvelibwa.c ; ; Interplay Movie (MVE) File Player Library (32-Bit Win95 Version) ; Assembly Language Components ; Written by Paul Allen Edelstein ; ; (c) 1997 Interplay Productions. All Rights Reserved. ; This file is confidential and consists of proprietary information ; of Interplay Productions. This file and associated libraries ; may not, in whole or in part, be disclosed to third parties, ; incorporated into any software product which is not being created ; for Interplay Productions, copied or duplicated in any form, ; without the prior written permission of Interplay Productions. ; Further, you may not reverse engineer, decompile or otherwise ; attempt to derive source code of this material. ; ;;--- Options --- ONLYNEW equ 0 ; For debug, disables motion comp LOGGING equ 0 ; Log timing statistics PARTIAL equ 1 ; Support for partial updates PKDATA equ 1 ; Support for packed data HICOLOR equ 1 ; Support for HiColor INTERP equ 0 ; Interpolated squares ; 0:none (4x4x8), 1:generic dither, ; 2:direction dither, 3:blend COMPOPS equ 1 ; Compressed opcode table SCALING equ 1 ; Scaling support DECOMPD equ 0 ; Support for dithered half vert res TRANS16 equ 1 ; Support for translating 16-bit rgb format ;;--- Constants --- ; Width and height of sections in pixels. SWIDTH equ 8 SHEIGHT equ 8 LOG2_SWIDTH equ 3 LOG2_SHEIGHT equ 3 ;;--- EXTERN pal_tbl ;:BYTE ; unsigned char pal_tbl[3*256]; EXTERN pal15_tbl ;:WORD ; unsigned short pal15_tbl[256]; EXTERN nf_trans16_lo EXTERN nf_trans16_hi EXTERN snd_8to16 ;: WORD ; short snd_8to16[256]; EXTERN nf_buf_cur ;: PTRBYTE ; unsigned char* nf_buf_cur; EXTERN nf_buf_prv ;: PTRBYTE ; unsigned char* nf_buf_prv; ;; NextFrame parameters EXTERN nf_wqty ;: BYTE ;unsigned char nf_wqty; // (width/SWIDTH) EXTERN nf_hqty ;: BYTE ;unsigned char nf_hqty; // (height/SHEIGHT) EXTERN nf_fqty ;: BYTE ;unsigned char nf_fqty; // Number of fields EXTERN nf_hicolor ;: DWORD ;unsigned nf_hicolor; // HiColor (0:none,1:normal,2:swapped) ;; EXTERN nf_width ;: DWORD ;unsigned nf_width; // wqty * SWIDTH EXTERN nf_height ;: DWORD ;unsigned nf_height; // hqty * SHEIGHT; EXTERN nf_new_line ;: DWORD ;unsigned nf_new_line; // width - SWIDTH EXTERN nf_new_row0 ;: DWORD ;unsigned nf_new_row0; // SHEIGHT*width*2-width EXTERN nf_back_right ;: DWORD ;unsigned nf_back_right; // (SHEIGHT-1)*width ;; Frame parameters ;; Portion of current frame which has been updated ;; and needs to be sent to screen. ;; EXTERN nf_new_x ;: DWORD ;unsigned nf_new_x; EXTERN nf_new_y ;: DWORD ;unsigned nf_new_y; EXTERN nf_new_w ;: DWORD ;unsigned nf_new_w; EXTERN nf_new_h ;: DWORD ;unsigned nf_new_h; ; These are all of our global parameter-passing variables - AH extern sndDecompM16_dst extern sndDecompM16_src extern sndDecompM16_len extern sndDecompM16_prev extern sndDecompM16_return extern sndDecompS16_dst extern sndDecompS16_src extern sndDecompS16_len extern sndDecompS16_prev extern sndDecompS16_return extern nfHPkDecomp_ops extern nfHPkDecomp_comp extern nfHPkDecomp_x extern nfHPkDecomp_y extern nfHPkDecomp_w extern nfHPkDecomp_h ; This is the global array of pointers to memory locations that ; need to be self-modified - AH extern global_unlock_memory_pointers ; These are our functions that the C stubs call - AH global _asm_sndDecompM16 global _asm_sndDecompS16 global _asm_nfPkConfig global _asm_nfHPkDecomp ; This is our memory "unlock" function for the self-mofiying asm - AH global _asm_selfModify SECTION .data db "(c) 1997 Interplay Productions. All Rights Reserved.\n" db "This file is confidential and consists of proprietary information\n" db "of Interplay Productions. This file and associated libraries\n" db "may not, in whole or in part, be disclosed to third parties,\n" db "incorporated into any software product which is not being created\n" db "for Interplay Productions, copied or duplicated in any form,\n" db "without the prior written permission of Interplay Productions.\n" db "Further, you may not reverse engineer, decompile or otherwise\n" db "attempt to derive source code of this material.\n",0 tbuf dd 0 ;LOCAL tbuf : PTRBYTE new_row dd 0 ;LOCAL new_row :DWORD DiffBufPtrs dd 0 ;LOCAL DiffBufPtrs :DWORD nfpk_back_right dd 0 ;LOCAL nfpk_back_right : DWORD wcnt dd 0 ;LOCAL wcnt :DWORD bcomp dd 0 ;LOCAL bcomp :PTRBYTE nfhpk_OpTbl: ;label dword dd nf0 ;dword offset nf0 ; Prev Same (0) dd nf1 ;dword offset nf1 ; No change (and copied to screen) (0) dd nf2 ;dword offset nf2 ; Near shift from older part of current buf (1) dd nf3 ;dword offset nf3 ; Near shift from newer part of current buf (1) dd nf4 ;dword offset nf4 ; Near shift from previous buffer (1) dd nf5 ;dword offset nf5 ; Far shift from previous buffer (2) dd nf6 ;dword offset nf6 ; Far shift from current buffer (2) ; [Or if COMPOPS, run of no changes (0)] dd nf7 ;dword offset nf7 ; 8x8x1 (10 bytes) or low 4x4x1 (4 bytes) dd nf8 ;dword offset nf8 ; 2x2 4x4x1 (16 bytes) or 2x1 4x8x1 (12 bytes) or 1x2 8x4x1 (12 bytes) dd nf9 ;dword offset nf9 ; 8x8x2 (20 bytes) or low 4x4x2 (8 bytes) or ; low 4x8x2 (12 bytes) or low 8x4x2 (12 bytes) dd nf10 ;dword offset nf10 ; 2x2 4x4x2 (32 bytes) or 2x1 4x8x2 (24 bytes) or 1x2 4x8x2 (24 bytes) dd nf11 ;dword offset nf11 ; 8x8x8 (64 bytes) dd nf12 ;dword offset nf12 ; low 4x4x8 (16 bytes) dd nf13 ;dword offset nf13 ; 2x2 4x4x0 (ie 2x2x8) (4 bytes) dd nf14 ;dword offset nf14 ; 8x8x0 (1 byte) dd nf15 ;dword offset nf15 ; mix 8x8x0 (2 bytes) ; signed 8-bit y * nf_width nfpk_ShiftY times 256 dd 0 ; Constant tables ; 8-bit -8:7 x nf_width + -8:7 nfpk_ShiftP1 %assign y -8 %rep 16 ;16 %assign x -8 %rep 16 ;16 db x,y %assign x x+1 %endrep %assign y y+1 %endrep ; 8-bit to right and below in roughly 0:14*nf_width + -14:14 (-3 cases) ; negative is ; 8-bit to left and above in roughly -14:0*nf_width + -14:14 (-3 cases) nfpk_ShiftP2 %assign y 0 %rep 8 ;8 %assign x 8 %rep 7 ;7 db x,y %assign x x+1 %endrep %assign y y+1 %endrep %assign y 8 %rep 6 ;6 %assign x -14 %rep 14 ;14 db x,y %assign x x+1 %endrep %assign x 0 %rep 15 ;15 db x,y %assign x x+1 %endrep %assign y y+1 %endrep %assign x -14 %rep 14 ;14 db x,14 %assign x x+1 %endrep %assign x 0 %rep 12 ;12 db x,14 %assign x x+1 %endrep ; Constant tables nfhpk_mov4l ;LABEL DWORD ; low 4x1 in 8x1 (patch +1) ; mov eax, ebx/ecx db 0c0h+3, 0c0h+3, 0c0h+3, 0c0h+3 db 0c0h+1, 0c0h+3, 0c0h+3, 0c0h+3 db 0c0h+3, 0c0h+1, 0c0h+3, 0c0h+3 db 0c0h+1, 0c0h+1, 0c0h+3, 0c0h+3 db 0c0h+3, 0c0h+3, 0c0h+1, 0c0h+3 db 0c0h+1, 0c0h+3, 0c0h+1, 0c0h+3 db 0c0h+3, 0c0h+1, 0c0h+1, 0c0h+3 db 0c0h+1, 0c0h+1, 0c0h+1, 0c0h+3 db 0c0h+3, 0c0h+3, 0c0h+3, 0c0h+1 db 0c0h+1, 0c0h+3, 0c0h+3, 0c0h+1 db 0c0h+3, 0c0h+1, 0c0h+3, 0c0h+1 db 0c0h+1, 0c0h+1, 0c0h+3, 0c0h+1 db 0c0h+3, 0c0h+3, 0c0h+1, 0c0h+1 db 0c0h+1, 0c0h+3, 0c0h+1, 0c0h+1 db 0c0h+3, 0c0h+1, 0c0h+1, 0c0h+1 db 0c0h+1, 0c0h+1, 0c0h+1, 0c0h+1 nfhpk_mov8 ;LABEL DWORD ; 8x1 (each two bits select a pair of colors in a reg) ; low 4x2 in 8x2 (each two bits select a duplicated color in reg) ; (patch +1) ; mov ds:[edi+0/4/8/12], ebx/edx/ecx/ebp ; Note: Patched code specifies mov [ebp+0]... instead ; of mov [edi+0]... to insure that 8-bit offsets are ; used by the assembler even for offset of zero. %assign m4 24 %rep 4 %assign m3 24 %rep 4 %assign m2 24 %rep 4 %assign m1 24 %rep 4 db m1+047h,m2+047h,m3+047h,m4+047h %if (m1 == 24) %assign m1 16 %elif (m1 == 16) %assign m1 8 %elif (m1 == 8) %assign m1 40 %endif %endrep %if (m2 == 24) %assign m2 16 %elif (m2 == 16) %assign m2 8 %elif (m2 == 8) %assign m2 40 %endif %endrep %if (m3 == 24) %assign m3 16 %elif (m3 == 16) %assign m3 8 %elif (m3 == 8) %assign m3 40 %endif %endrep %if (m4 == 24) %assign m4 16 %elif (m4 == 16) %assign m4 8 %elif (m4 == 8) %assign m4 40 %endif %endrep nfhpk_mov4 ; 4x2 (patch +2) ; mov ax, bx/dx/cx/bp ; low 4x2 in 8x2 (patch +1) ; mov eax, ebx/edx/ecx/ebp %assign m4 0c3h %rep 4 %assign m3 0c3h %rep 4 %assign m2 0c3h %rep 4 %assign m1 0c3h %rep 4 db m1,m2,m3,m4 %if (m1 == 0c3h) %assign m1 0c2h %elif (m1 == 0c2h) %assign m1 0c1h %elif (m1 == 0c1h) %assign m1 0c5h %endif %endrep %if (m2 == 0c3h) %assign m2 0c2h %elif (m2 == 0c2h) %assign m2 0c1h %elif (m2 == 0c1h) %assign m2 0c5h %endif %endrep %if (m3 == 0c3h) %assign m3 0c2h %elif (m3 == 0c2h) %assign m3 0c1h %elif (m3 == 0c1h) %assign m3 0c5h %endif %endrep %if (m4 == 0c3h) %assign m4 0c2h %elif (m4 == 0c2h) %assign m4 0c1h %elif (m4 == 0c1h) %assign m4 0c5h %endif %endrep SEGMENT .text _asm_selfModify: pushf ; Move the pointer to the start of the pointer array into eax mov eax, global_unlock_memory_pointers ; Load unlock addresses from _asm_nfHPkDecomp mov dword [eax], nf7_0 mov dword [eax + 4], nf8_0 mov dword [eax + 8], nf9_0 mov dword [eax + 12], nf10_0 mov dword [eax + 16], nf23_0 mov dword [eax + 20], nf24_0 mov dword [eax + 24], nf25_0 mov dword [eax + 28], nf26_0 mov dword [eax + 32], nf42_0 popf ret ;-------------------------------------------------------------------- ; Sound Management ;-------------------- ;unsigned sndDecompM16(unsigned short *dst, const unsigned char *src, ; unsigned len, unsigned prev); ; ;Decompresses a mono stream containing len samples ;(src is len bytes, dst is len*2 bytes) ;prev is the previous decompression state or zero. ;Returns new decompression state. ; _asm_sndDecompM16: ; PROC USES ESI EDI EBX, \ ; dst:PTRWORD, src:PTRBYTE, len:DWORD, prev:DWORD mov eax, [sndDecompM16_prev] mov ecx, [sndDecompM16_len] jecxz done mov esi, [sndDecompM16_src] mov edi, [sndDecompM16_dst] xor ebx, ebx lp: mov bl, byte [esi] add esi, 1 add ax, word [snd_8to16 + ebx*2] mov word [edi], ax add edi, 2 dec ecx jnz lp done: ; Store our return value - AH mov dword [sndDecompM16_return], eax ret ;sndDecompM16 ENDP ;unsigned sndDecompS16(unsigned short *dst, const unsigned char *src, ; unsigned len, unsigned prev); ; ;Decompresses a stereo stream containing len samples ;(src is len*2 bytes, dst is len*4 bytes) ;prev is the previous decompression state or zero ; (It encodes the 16-bit states of the two stereo channels ; in its low and high order 16-bit halves.) ;Returns new decompression state. ; _asm_sndDecompS16: ; PROC USES ESI EDI EBX, \ ; dst:PTRWORD, src:PTRBYTE, len:DWORD, prev:DWORD movzx eax, word [sndDecompS16_prev] movzx edx, word [sndDecompS16_prev+2] mov ecx, [sndDecompS16_len] jecxz Sdone mov esi, [sndDecompS16_src] mov edi, [sndDecompS16_dst] xor ebx, ebx Slp: mov bl, byte [esi] add esi, 1 add ax, word [snd_8to16 + ebx*2] mov word [edi], ax add edi, 2 mov bl, byte [esi] add esi, 1 add dx, word [snd_8to16 + ebx*2] mov word [edi], dx add edi, 2 dec ecx jnz Slp Sdone: shl edx, 16 or eax, edx ; Store our return value - AH mov dword [sndDecompS16_return], eax ret ;sndDecompS16 ENDP ;-------------------------------------------------------------------- ; NextFrame (Video Decompression) ;---------------------------------- %macro NF_DECOMP_INIT 1 ;HI_COLOR_FLAG: REQ mov eax, [nf_buf_prv] ;br ; DiffBufPtrs = nf_buf_prv - nf_buf_cur sub eax, [nf_buf_cur] ;br mov [DiffBufPtrs], eax xor ebx, ebx ; ebx = nf_fqty (convert to 32-bits) mov bl, [nf_fqty] mov eax, [nfHPkDecomp_x] ;br ; nf_new_x = x*SWIDTH*2^HI_COLOR_FLAG; shl eax, LOG2_SWIDTH+%1 ;HI_COLOR_FLAG mov [nf_new_x], eax mov eax, [nfHPkDecomp_w] ;br ; nf_new_w = w*SWIDTH*2^HI_COLOR_FLAG; shl eax, LOG2_SWIDTH+%1 ;HI_COLOR_FLAG mov [nf_new_w], eax mov eax, [nfHPkDecomp_y] ;br ; nf_new_y = y*nf_fqty*SHEIGHT; shl eax, LOG2_SHEIGHT mul ebx ;nf_fqty mov [nf_new_y], eax mov eax, [nfHPkDecomp_h] ;br ; nf_new_h = h*nf_fqty*SHEIGHT; shl eax, LOG2_SHEIGHT mul ebx ;nf_fqty mov [nf_new_h], eax mov eax, [nf_new_row0] ;br ; new_row = nf_new_row0 - nf_new_w; sub eax, [nf_new_w] ;br mov [new_row], eax ;; Move to correct place in current buffer mov eax, [nf_buf_cur] ;br ; tbuf = nf_buf_cur mov [tbuf], eax ; %if (nfHPkDecomp_x || nfHPkDecomp_y) ; if (x||y) ; cmp, jnz, jz and labels are mine - AH cmp dword [nfHPkDecomp_x], 0 jne before cmp dword [nfHPkDecomp_y], 0 je after before: mov eax, [nf_new_y] ;br ; tbuf += nf_new_y*nf_width + nf_new_x; mul dword [nf_width] ; Added dword - AH add eax, [nf_new_x] ;br add [tbuf], eax after: ; %endif %endmacro ; DECOMP_INIT ;---------------------------------------------------------------------- ; nfPkConfig initializes tables used by nfPkDecomp ; which are dependent on screen size. _asm_nfPkConfig: ; PROC USES ESI EDI EBX ; Build ShiftY table ; lea edi, [nfpk_ShiftY] mov ebx, [nf_width] mov eax, 0 mov ecx, 128 lp1: mov [edi], eax add edi,4 add eax,ebx dec ecx jne lp1 mov eax, ebx shl eax, 7 neg eax mov ecx, 128 lp2: mov [edi], eax add edi,4 add eax,ebx dec ecx jne lp2 ret ;nfPkConfig ENDP %macro Trans16_3 3 ; dst:req, idx:req, mask xor eax, eax mov al, [%2] mov %1, [nf_trans16_lo + eax*2] xor eax, eax mov al, [%2+1] or %1, [nf_trans16_hi + eax*2] %endmacro %macro Trans16 2 ; dst:req, idx:req xor eax, eax mov al, [%2] mov %1, [nf_trans16_lo + eax*2] xor eax, eax mov al, [%2+1] or %1, [nf_trans16_hi + eax*2] %endmacro _asm_nfHPkDecomp: ; PROC USES ESI EDI EBX, \ ; ops:PTRBYTE, comp:PTRBYTE, \ ; x:DWORD, y:DWORD, w:DWORD, h:DWORD NF_DECOMP_INIT 1 mov eax, [nf_back_right] ;br sub eax, SWIDTH*2 mov [nfpk_back_right], eax mov esi, [nfHPkDecomp_comp] ;br mov edi, [tbuf] xor eax, eax mov ax, [esi] add eax, esi mov [bcomp], eax add esi, 2 nf_StartRow: mov eax, [nfHPkDecomp_w] ;br shr eax, 1 mov [wcnt],eax ALIGN 4 nf_NextPair: dec dword [wcnt] ; Added dword - AH js nf_NextRow mov ebx, [nfHPkDecomp_ops] ; br mov al, [ebx] inc ebx mov [nfHPkDecomp_ops], ebx xor ebx, ebx mov bl, al shr bl, 4 and eax, 0Fh push dword nf_NextPair ; Added dword on these two - AH push dword [nfhpk_OpTbl + ebx*4] jmp [nfhpk_OpTbl + eax*4] nf_NextRow: add edi, [new_row] ;br dec dword [nfHPkDecomp_h] ; Added dword - AH jnz nf_StartRow ret ;---------------------------------------- ALIGN 4 nf0: ; No change from previous buffer mov eax, [DiffBufPtrs] ; br jmp nf_shift ;---------------------------------------- ALIGN 4 nf1: ; No change (and copied to screen) add edi, SWIDTH*2 retn ;---------------------------------------- ALIGN 4 nf2: ; Near shift from older part of current buffer xor eax, eax mov ebx, [bcomp] ; br inc dword [bcomp] ; Added dword - AH mov al, [ebx] mov ax, [nfpk_ShiftP2 + eax*2] nf_xyc_shift: xor ebx, ebx mov bl, ah shl eax, 24 sar eax, 24-1 add eax, [nfpk_ShiftY + ebx*4] jmp nf_shift ;---------------------------------------- ALIGN 4 nf3: ; Near shift from newer part of current buffer xor eax, eax mov ebx, [bcomp] ; br inc dword [bcomp] ; Added dword - AH mov al, [ebx] mov ax, [nfpk_ShiftP2 + eax*2] neg al neg ah jmp nf_xyc_shift ;---------------------------------------- ALIGN 4 nf4: ; Near shift from previous buffer xor eax, eax mov ebx, [bcomp] ; br inc dword [bcomp] ; Added dword - AH mov al, [ebx] mov ax, [nfpk_ShiftP1 + eax*2] jmp nf_xyp_shift ;---------------------------------------- ALIGN 4 nf5: ; Far shift from previous buffer mov ax, [esi] add esi, 2 nf_xyp_shift: xor ebx, ebx mov bl, ah shl eax, 24 sar eax, 24-1 add eax, [nfpk_ShiftY + ebx*4] add eax, [DiffBufPtrs] ; br jmp nf_shift ;---------------------------------------- ALIGN 4 nf6: ; Far shift from current buffer mov ax, [esi] add esi, 2 jmp nf_xyc_shift ;---------------------------------------- ALIGN 4 nf_shift: mov ebx, esi ; save esi lea esi, [edi+eax] mov edx, [nf_width] %rep 7 mov eax, [esi] mov [edi], eax mov eax, [esi+4] mov [edi+4], eax mov eax, [esi+8] mov [edi+8], eax mov eax, [esi+12] mov [edi+12], eax add esi, edx add edi, edx %endrep mov eax, [esi] mov [edi], eax mov eax, [esi+4] mov [edi+4], eax mov eax, [esi+8] mov [edi+8], eax mov eax, [esi+12] mov [edi+12], eax sub edi, [nfpk_back_right] ;br ; (SHEIGHT-1)*width+8 mov esi, ebx ; restore esi retn ;---------------------------------------- ALIGN 4 nf7: ; 8x8x1 (12 bytes) test word [esi], 08000h jnz near nf23 xor eax, eax lea ecx, [nfhpk_mov8] lea edx, [nf7_11+1] ; Removed byte ds:- AH mov al, [esi+4] mov ebx, [ecx+eax*4] mov [edx+(nf7_11-nf7_11)], bl mov [edx+(nf7_12-nf7_11)], bh shr ebx, 16 mov [edx+(nf7_13-nf7_11)], bl mov [edx+(nf7_14-nf7_11)], bh mov al, [esi+5] mov ebx, [ecx+eax*4] mov [edx+(nf7_21-nf7_11)], bl mov [edx+(nf7_22-nf7_11)], bh shr ebx, 16 mov [edx+(nf7_23-nf7_11)], bl mov [edx+(nf7_24-nf7_11)], bh mov al, [esi+6] mov ebx, [ecx+eax*4] mov [edx+(nf7_31-nf7_11)], bl mov [edx+(nf7_32-nf7_11)], bh shr ebx, 16 mov [edx+(nf7_33-nf7_11)], bl mov [edx+(nf7_34-nf7_11)], bh mov al, [esi+7] mov ebx, [ecx+eax*4] mov [edx+(nf7_41-nf7_11)], bl mov [edx+(nf7_42-nf7_11)], bh shr ebx, 16 mov [edx+(nf7_43-nf7_11)], bl mov [edx+(nf7_44-nf7_11)], bh lea edx, [edx+(nf7_51-nf7_11)] mov al, [esi+8] mov ebx, [ecx+eax*4] mov [edx+(nf7_51-nf7_51)], bl mov [edx+(nf7_52-nf7_51)], bh shr ebx, 16 mov [edx+(nf7_53-nf7_51)], bl mov [edx+(nf7_54-nf7_51)], bh mov al, [esi+9] mov ebx, [ecx+eax*4] mov [edx+(nf7_61-nf7_51)], bl mov [edx+(nf7_62-nf7_51)], bh shr ebx, 16 mov [edx+(nf7_63-nf7_51)], bl mov [edx+(nf7_64-nf7_51)], bh mov al, [esi+10] mov ebx, [ecx+eax*4] mov [edx+(nf7_71-nf7_51)], bl mov [edx+(nf7_72-nf7_51)], bh shr ebx, 16 mov [edx+(nf7_73-nf7_51)], bl mov [edx+(nf7_74-nf7_51)], bh mov al, [esi+11] mov ebx, [ecx+eax*4] mov [edx+(nf7_81-nf7_51)], bl mov [edx+(nf7_82-nf7_51)], bh shr ebx, 16 mov [edx+(nf7_83-nf7_51)], bl mov [edx+(nf7_84-nf7_51)], bh push ebp push esi ; load ebx,edx,ecx,ebp with 00,01,10,11 color combinations ; (note that bits are read least significant first). Trans16 cx, esi+2 shl ecx, 16 Trans16 cx, esi mov esi,[nf_width] mov edx, ecx ror edx, 16 mov ebx, edx mov bx, cx mov ebp, ecx mov bp, dx jmp nf7_0 ; flush prefetch ALIGN 4 nf7_0: nf7_11: mov [ebp+0], ebx nf7_12: mov [ebp+4], ebx nf7_13: mov [ebp+8], ebx nf7_14: mov [ebp+12], ebx add edi, esi nf7_21: mov [ebp+0], ebx nf7_22: mov [ebp+4], ebx nf7_23: mov [ebp+8], ebx nf7_24: mov [ebp+12], ebx add edi, esi nf7_31: mov [ebp+0], ebx nf7_32: mov [ebp+4], ebx nf7_33: mov [ebp+8], ebx nf7_34: mov [ebp+12], ebx add edi, esi nf7_41: mov [ebp+0], ebx nf7_42: mov [ebp+4], ebx nf7_43: mov [ebp+8], ebx nf7_44: mov [ebp+12], ebx add edi, esi nf7_51: mov [ebp+0], ebx nf7_52: mov [ebp+4], ebx nf7_53: mov [ebp+8], ebx nf7_54: mov [ebp+12], ebx add edi, esi nf7_61: mov [ebp+0], ebx nf7_62: mov [ebp+4], ebx nf7_63: mov [ebp+8], ebx nf7_64: mov [ebp+12], ebx add edi, esi nf7_71: mov [ebp+0], ebx nf7_72: mov [ebp+4], ebx nf7_73: mov [ebp+8], ebx nf7_74: mov [ebp+12], ebx add edi, esi nf7_81: mov [ebp+0], ebx nf7_82: mov [ebp+4], ebx nf7_83: mov [ebp+8], ebx nf7_84: mov [ebp+12], ebx pop esi pop ebp add esi, 12 sub edi, [nfpk_back_right] ;br ; (SHEIGHT-1)*width+8 retn ;---------------------------------------- ALIGN 4 ;nf7+16 nf23: ; low 4x4x1 (6 bytes) xor eax, eax lea ecx, [nfhpk_mov4l] lea edx, [nf23_11+1] ; Removed byte ds: - AH mov al, [esi+4] and al, 0fH mov ebx, [ecx+eax*4] mov [edx+(nf23_11-nf23_11)], bl mov [edx+(nf23_12-nf23_11)], bh shr ebx, 16 mov [edx+(nf23_13-nf23_11)], bl mov [edx+(nf23_14-nf23_11)], bh mov al, [esi+4] shr al, 4 mov ebx, [ecx+eax*4] mov [edx+(nf23_31-nf23_11)], bl mov [edx+(nf23_32-nf23_11)], bh shr ebx, 16 mov [edx+(nf23_33-nf23_11)], bl mov [edx+(nf23_34-nf23_11)], bh mov al, [esi+5] and al, 0fH mov ebx, [ecx+eax*4] mov [edx+(nf23_51-nf23_11)], bl mov [edx+(nf23_52-nf23_11)], bh shr ebx, 16 mov [edx+(nf23_53-nf23_11)], bl mov [edx+(nf23_54-nf23_11)], bh mov al, [esi+5] shr al, 4 mov ebx, [ecx+eax*4] mov [edx+(nf23_71-nf23_11)], bl mov [edx+(nf23_72-nf23_11)], bh shr ebx, 16 mov [edx+(nf23_73-nf23_11)], bl mov [edx+(nf23_74-nf23_11)], bh mov edx, [nf_width] ; load ebx,ecx with 00,11 color combinations Trans16_3 cx, esi, 1 shrd ebx, ecx, 16 mov bx, cx Trans16 cx, esi+2 shrd eax, ecx, 16 mov ax, cx mov ecx, eax jmp nf23_0 ; flush prefetch ALIGN 4 nf23_0: nf23_11:mov eax, ebx mov [edi], eax mov [edi+edx], eax nf23_12:mov eax, ebx mov [edi+4], eax mov [edi+edx+4], eax nf23_13:mov eax, ebx mov [edi+8], eax mov [edi+edx+8], eax nf23_14:mov eax, ebx mov [edi+12], eax mov [edi+edx+12], eax lea edi, [edi+edx*2] nf23_31:mov eax, ebx mov [edi], eax mov [edi+edx], eax nf23_32:mov eax, ebx mov [edi+4], eax mov [edi+edx+4], eax nf23_33:mov eax, ebx mov [edi+8], eax mov [edi+edx+8], eax nf23_34:mov eax, ebx mov [edi+12], eax mov [edi+edx+12], eax lea edi, [edi+edx*2] nf23_51:mov eax, ebx mov [edi], eax mov [edi+edx], eax nf23_52:mov eax, ebx mov [edi+4], eax mov [edi+edx+4], eax nf23_53:mov eax, ebx mov [edi+8], eax mov [edi+edx+8], eax nf23_54:mov eax, ebx mov [edi+12], eax mov [edi+edx+12], eax lea edi, [edi+edx*2] nf23_71:mov eax, ebx mov [edi], eax mov [edi+edx], eax nf23_72:mov eax, ebx mov [edi+4], eax mov [edi+edx+4], eax nf23_73:mov eax, ebx mov [edi+8], eax mov [edi+edx+8], eax nf23_74:mov eax, ebx mov [edi+12], eax mov [edi+edx+12], eax add edi, edx sub edi, [nfpk_back_right] add esi, 6 retn ;---------------------------------------- ALIGN 4 nf8: ; 2x2 4x4x1 (24 bytes) test word [esi], 08000h jnz near nf24 xor eax, eax lea ecx, [nfhpk_mov8] lea edx, [nf8_11+1] ; Removed byte ds: - AH mov al, [esi+4] mov ebx, [ecx+eax*4] mov [edx+(nf8_11-nf8_11)], bl mov [edx+(nf8_12-nf8_11)], bh shr ebx, 16 mov [edx+(nf8_13-nf8_11)], bl mov [edx+(nf8_14-nf8_11)], bh mov al, [esi+5] mov ebx, [ecx+eax*4] mov [edx+(nf8_21-nf8_11)], bl mov [edx+(nf8_22-nf8_11)], bh shr ebx, 16 mov [edx+(nf8_23-nf8_11)], bl mov [edx+(nf8_24-nf8_11)], bh mov al, [esi+10] mov ebx, [ecx+eax*4] mov [edx+(nf8_31-nf8_11)], bl mov [edx+(nf8_32-nf8_11)], bh shr ebx, 16 mov [edx+(nf8_33-nf8_11)], bl mov [edx+(nf8_34-nf8_11)], bh mov al, [esi+11] mov ebx, [ecx+eax*4] mov [edx+(nf8_41-nf8_11)], bl mov [edx+(nf8_42-nf8_11)], bh shr ebx, 16 mov [edx+(nf8_43-nf8_11)], bl mov [edx+(nf8_44-nf8_11)], bh add edx, nf8_51-nf8_11 mov al, [esi+16] mov ebx, [ecx+eax*4] mov [edx+(nf8_51-nf8_51)], bl mov [edx+(nf8_52-nf8_51)], bh shr ebx, 16 mov [edx+(nf8_53-nf8_51)], bl mov [edx+(nf8_54-nf8_51)], bh mov al, [esi+17] mov ebx, [ecx+eax*4] mov [edx+(nf8_61-nf8_51)], bl mov [edx+(nf8_62-nf8_51)], bh shr ebx, 16 mov [edx+(nf8_63-nf8_51)], bl mov [edx+(nf8_64-nf8_51)], bh mov al, [esi+22] mov ebx, [ecx+eax*4] mov [edx+(nf8_71-nf8_51)], bl mov [edx+(nf8_72-nf8_51)], bh shr ebx, 16 mov [edx+(nf8_73-nf8_51)], bl mov [edx+(nf8_74-nf8_51)], bh mov al, [esi+23] mov ebx, [ecx+eax*4] mov [edx+(nf8_81-nf8_51)], bl mov [edx+(nf8_82-nf8_51)], bh shr ebx, 16 mov [edx+(nf8_83-nf8_51)], bl mov [edx+(nf8_84-nf8_51)], bh push ebp push esi ; load ebx,edx,ecx,ebp with 00,01,10,11 color combinations ; (note that bits are read least significant first). Trans16 cx, esi+18+2 shl ecx, 16 Trans16 cx, esi+18 push ecx Trans16 cx, esi+12+2 shl ecx, 16 Trans16 cx, esi+12 push ecx Trans16 cx, esi+6+2 shl ecx, 16 Trans16 cx, esi+6 push ecx Trans16 cx, esi+2 shl ecx, 16 Trans16 cx, esi mov esi,[nf_width] mov edx, ecx ror edx, 16 mov ebx, edx mov bx, cx mov ebp, ecx mov bp, dx jmp nf8_0 ; flush prefetch ALIGN 4 nf8_0: nf8_11: mov [ebp+0], ebx nf8_12: mov [ebp+4], ebx add edi, esi nf8_13: mov [ebp+0], ebx nf8_14: mov [ebp+4], ebx add edi, esi nf8_21: mov [ebp+0], ebx nf8_22: mov [ebp+4], ebx add edi, esi nf8_23: mov [ebp+0], ebx nf8_24: mov [ebp+4], ebx add edi, esi pop ecx mov edx, ecx ror edx, 16 mov ebx, edx mov bx, cx mov ebp, ecx mov bp, dx nf8_31: mov [ebp+0], ebx nf8_32: mov [ebp+4], ebx add edi, esi nf8_33: mov [ebp+0], ebx nf8_34: mov [ebp+4], ebx add edi, esi nf8_41: mov [ebp+0], ebx nf8_42: mov [ebp+4], ebx add edi, esi nf8_43: mov [ebp+0], ebx nf8_44: mov [ebp+4], ebx add edi, esi lea eax, [esi*8-8] sub edi, eax pop ecx mov edx, ecx ror edx, 16 mov ebx, edx mov bx, cx mov ebp, ecx mov bp, dx nf8_51: mov [ebp+0], ebx nf8_52: mov [ebp+4], ebx add edi, esi nf8_53: mov [ebp+0], ebx nf8_54: mov [ebp+4], ebx add edi, esi nf8_61: mov [ebp+0], ebx nf8_62: mov [ebp+4], ebx add edi, esi nf8_63: mov [ebp+0], ebx nf8_64: mov [ebp+4], ebx add edi, esi pop ecx mov edx, ecx ror edx, 16 mov ebx, edx mov bx, cx mov ebp, ecx mov bp, dx nf8_71: mov [ebp+0], ebx nf8_72: mov [ebp+4], ebx add edi, esi nf8_73: mov [ebp+0], ebx nf8_74: mov [ebp+4], ebx add edi, esi nf8_81: mov [ebp+0], ebx nf8_82: mov [ebp+4], ebx add edi, esi nf8_83: mov [ebp+0], ebx nf8_84: mov [ebp+4], ebx pop esi pop ebp add esi, 24 sub edi, 8 sub edi, [nfpk_back_right] ;br ; (SHEIGHT-1)*width+8 retn ;---------------------------------------- ALIGN 4 ;nf8+16 nf24: ; 2x1 4x8x1 (16 bytes) test word [esi+8], 08000h jnz near nf40 xor eax, eax lea ecx, [nfhpk_mov8] lea edx, [nf24_11+1] ; Removed byte ds: - AH mov al, [esi+4] mov ebx, [ecx+eax*4] mov [edx+(nf24_11-nf24_11)], bl mov [edx+(nf24_12-nf24_11)], bh shr ebx, 16 mov [edx+(nf24_13-nf24_11)], bl mov [edx+(nf24_14-nf24_11)], bh mov al, [esi+5] mov ebx, [ecx+eax*4] mov [edx+(nf24_21-nf24_11)], bl mov [edx+(nf24_22-nf24_11)], bh shr ebx, 16 mov [edx+(nf24_23-nf24_11)], bl mov [edx+(nf24_24-nf24_11)], bh mov al, [esi+6] mov ebx, [ecx+eax*4] mov [edx+(nf24_31-nf24_11)], bl mov [edx+(nf24_32-nf24_11)], bh shr ebx, 16 mov [edx+(nf24_33-nf24_11)], bl mov [edx+(nf24_34-nf24_11)], bh mov al, [esi+7] mov ebx, [ecx+eax*4] mov [edx+(nf24_41-nf24_11)], bl mov [edx+(nf24_42-nf24_11)], bh shr ebx, 16 mov [edx+(nf24_43-nf24_11)], bl mov [edx+(nf24_44-nf24_11)], bh add edx, nf24_51-nf24_11 mov al, [esi+12] mov ebx, [ecx+eax*4] mov [edx+(nf24_51-nf24_51)], bl mov [edx+(nf24_52-nf24_51)], bh shr ebx, 16 mov [edx+(nf24_53-nf24_51)], bl mov [edx+(nf24_54-nf24_51)], bh mov al, [esi+13] mov ebx, [ecx+eax*4] mov [edx+(nf24_61-nf24_51)], bl mov [edx+(nf24_62-nf24_51)], bh shr ebx, 16 mov [edx+(nf24_63-nf24_51)], bl mov [edx+(nf24_64-nf24_51)], bh mov al, [esi+14] mov ebx, [ecx+eax*4] mov [edx+(nf24_71-nf24_51)], bl mov [edx+(nf24_72-nf24_51)], bh shr ebx, 16 mov [edx+(nf24_73-nf24_51)], bl mov [edx+(nf24_74-nf24_51)], bh mov al, [esi+15] mov ebx, [ecx+eax*4] mov [edx+(nf24_81-nf24_51)], bl mov [edx+(nf24_82-nf24_51)], bh shr ebx, 16 mov [edx+(nf24_83-nf24_51)], bl mov [edx+(nf24_84-nf24_51)], bh push ebp push esi ; load ebx,edx,ecx,ebp with 00,01,10,11 color combinations ; (note that bits are read least significant first). Trans16 cx, esi+8+2 shl ecx, 16 Trans16 cx, esi+8 push ecx Trans16 cx, esi+2 shl ecx, 16 Trans16_3 cx, esi, 1 mov esi,[nf_width] mov edx, ecx ror edx, 16 mov ebx, edx mov bx, cx mov ebp, ecx mov bp, dx jmp nf24_0 ; flush prefetch ALIGN 4 nf24_0: nf24_11:mov [ebp+0], ebx nf24_12:mov [ebp+4], ebx add edi, esi nf24_13:mov [ebp+0], ebx nf24_14:mov [ebp+4], ebx add edi, esi nf24_21:mov [ebp+0], ebx nf24_22:mov [ebp+4], ebx add edi, esi nf24_23:mov [ebp+0], ebx nf24_24:mov [ebp+4], ebx add edi, esi nf24_31:mov [ebp+0], ebx nf24_32:mov [ebp+4], ebx add edi, esi nf24_33:mov [ebp+0], ebx nf24_34:mov [ebp+4], ebx add edi, esi nf24_41:mov [ebp+0], ebx nf24_42:mov [ebp+4], ebx add edi, esi nf24_43:mov [ebp+0], ebx nf24_44:mov [ebp+4], ebx add edi, esi lea eax, [esi*8-8] sub edi, eax pop ecx mov edx, ecx ror edx, 16 mov ebx, edx mov bx, cx mov ebp, ecx mov bp, dx nf24_51:mov [ebp+0], ebx nf24_52:mov [ebp+4], ebx add edi, esi nf24_53:mov [ebp+0], ebx nf24_54:mov [ebp+4], ebx add edi, esi nf24_61:mov [ebp+0], ebx nf24_62:mov [ebp+4], ebx add edi, esi nf24_63:mov [ebp+0], ebx nf24_64:mov [ebp+4], ebx add edi, esi nf24_71:mov [ebp+0], ebx nf24_72:mov [ebp+4], ebx add edi, esi nf24_73:mov [ebp+0], ebx nf24_74:mov [ebp+4], ebx add edi, esi nf24_81:mov [ebp+0], ebx nf24_82:mov [ebp+4], ebx add edi, esi nf24_83:mov [ebp+0], ebx nf24_84:mov [ebp+4], ebx pop esi pop ebp add esi, 16 sub edi, 8 sub edi, [nfpk_back_right] ;br ; (SHEIGHT-1)*width+8 retn ;---------------------------------------- ALIGN 4 ;nf8+32 nf40: ; 1x2 8x4x1 (16 bytes) xor eax, eax lea ecx, [nfhpk_mov8] lea edx, [nf40_11+1] ; Removed byte ds: - AH mov al, [esi+4] mov ebx, [ecx+eax*4] mov [edx+(nf40_11-nf40_11)], bl mov [edx+(nf40_12-nf40_11)], bh shr ebx, 16 mov [edx+(nf40_13-nf40_11)], bl mov [edx+(nf40_14-nf40_11)], bh mov al, [esi+5] mov ebx, [ecx+eax*4] mov [edx+(nf40_21-nf40_11)], bl mov [edx+(nf40_22-nf40_11)], bh shr ebx, 16 mov [edx+(nf40_23-nf40_11)], bl mov [edx+(nf40_24-nf40_11)], bh mov al, [esi+6] mov ebx, [ecx+eax*4] mov [edx+(nf40_31-nf40_11)], bl mov [edx+(nf40_32-nf40_11)], bh shr ebx, 16 mov [edx+(nf40_33-nf40_11)], bl mov [edx+(nf40_34-nf40_11)], bh mov al, [esi+7] mov ebx, [ecx+eax*4] mov [edx+(nf40_41-nf40_11)], bl mov [edx+(nf40_42-nf40_11)], bh shr ebx, 16 mov [edx+(nf40_43-nf40_11)], bl mov [edx+(nf40_44-nf40_11)], bh add edx, nf40_51-nf40_11 mov al, [esi+12] mov ebx, [ecx+eax*4] mov [edx+(nf40_51-nf40_51)], bl mov [edx+(nf40_52-nf40_51)], bh shr ebx, 16 mov [edx+(nf40_53-nf40_51)], bl mov [edx+(nf40_54-nf40_51)], bh mov al, [esi+13] mov ebx, [ecx+eax*4] mov [edx+(nf40_61-nf40_51)], bl mov [edx+(nf40_62-nf40_51)], bh shr ebx, 16 mov [edx+(nf40_63-nf40_51)], bl mov [edx+(nf40_64-nf40_51)], bh mov al, [esi+14] mov ebx, [ecx+eax*4] mov [edx+(nf40_71-nf40_51)], bl mov [edx+(nf40_72-nf40_51)], bh shr ebx, 16 mov [edx+(nf40_73-nf40_51)], bl mov [edx+(nf40_74-nf40_51)], bh mov al, [esi+15] mov ebx, [ecx+eax*4] mov [edx+(nf40_81-nf40_51)], bl mov [edx+(nf40_82-nf40_51)], bh shr ebx, 16 mov [edx+(nf40_83-nf40_51)], bl mov [edx+(nf40_84-nf40_51)], bh push ebp push esi ; load ebx,edx,ecx,ebp with 00,01,10,11 color combinations ; (note that bits are read least significant first). Trans16 cx, esi+8+2 shl ecx, 16 Trans16_3 cx, esi+8, 1 push ecx Trans16 cx, esi+2 shl ecx, 16 Trans16_3 cx, esi, 1 mov esi,[nf_width] mov edx, ecx ror edx, 16 mov ebx, edx mov bx, cx mov ebp, ecx mov bp, dx jmp nf40_0 ; flush prefetch ALIGN 4 nf40_0: nf40_11:mov [ebp+0], ebx nf40_12:mov [ebp+4], ebx nf40_13:mov [ebp+8], ebx nf40_14:mov [ebp+12], ebx add edi, esi nf40_21:mov [ebp+0], ebx nf40_22:mov [ebp+4], ebx nf40_23:mov [ebp+8], ebx nf40_24:mov [ebp+12], ebx add edi, esi nf40_31:mov [ebp+0], ebx nf40_32:mov [ebp+4], ebx nf40_33:mov [ebp+8], ebx nf40_34:mov [ebp+12], ebx add edi, esi nf40_41:mov [ebp+0], ebx nf40_42:mov [ebp+4], ebx nf40_43:mov [ebp+8], ebx nf40_44:mov [ebp+12], ebx add edi, esi pop ecx mov edx, ecx ror edx, 16 mov ebx, edx mov bx, cx mov ebp, ecx mov bp, dx nf40_51:mov [ebp+0], ebx nf40_52:mov [ebp+4], ebx nf40_53:mov [ebp+8], ebx nf40_54:mov [ebp+12], ebx add edi, esi nf40_61:mov [ebp+0], ebx nf40_62:mov [ebp+4], ebx nf40_63:mov [ebp+8], ebx nf40_64:mov [ebp+12], ebx add edi, esi nf40_71:mov [ebp+0], ebx nf40_72:mov [ebp+4], ebx nf40_73:mov [ebp+8], ebx nf40_74:mov [ebp+12], ebx add edi, esi nf40_81:mov [ebp+0], ebx nf40_82:mov [ebp+4], ebx nf40_83:mov [ebp+8], ebx nf40_84:mov [ebp+12], ebx pop esi pop ebp add esi, 16 sub edi, [nfpk_back_right] ;br ; (SHEIGHT-1)*width+8 retn ;---------------------------------------- ALIGN 4 nf9: ; 8x8x2 (24 bytes) test word [esi], 08000h jnz near nf41 test word [esi+4], 08000h jnz near nf25 xor eax, eax lea ecx, [nfhpk_mov4] lea edx, [nf9_11+2] ; Removed byte ds: - AH mov al, [esi+8] mov ebx, [ecx+eax*4] mov [edx+(nf9_11-nf9_11)], bh mov [edx+(nf9_12-nf9_11)], bl shr ebx, 16 mov [edx+(nf9_13-nf9_11)], bh mov [edx+(nf9_14-nf9_11)], bl mov al, [esi+9] mov ebx, [ecx+eax*4] mov [edx+(nf9_15-nf9_11)], bh mov [edx+(nf9_16-nf9_11)], bl shr ebx, 16 mov [edx+(nf9_17-nf9_11)], bh mov [edx+(nf9_18-nf9_11)], bl mov al, [esi+10] mov ebx, [ecx+eax*4] mov [edx+(nf9_21-nf9_11)], bh mov [edx+(nf9_22-nf9_11)], bl shr ebx, 16 mov [edx+(nf9_23-nf9_11)], bh mov [edx+(nf9_24-nf9_11)], bl mov al, [esi+11] mov ebx, [ecx+eax*4] mov [edx+(nf9_25-nf9_11)], bh mov [edx+(nf9_26-nf9_11)], bl shr ebx, 16 mov [edx+(nf9_27-nf9_11)], bh mov [edx+(nf9_28-nf9_11)], bl mov al, [esi+12] mov ebx, [ecx+eax*4] mov [edx+(nf9_31-nf9_11)], bh mov [edx+(nf9_32-nf9_11)], bl shr ebx, 16 mov [edx+(nf9_33-nf9_11)], bh mov [edx+(nf9_34-nf9_11)], bl mov al, [esi+13] mov ebx, [ecx+eax*4] mov [edx+(nf9_35-nf9_11)], bh mov [edx+(nf9_36-nf9_11)], bl shr ebx, 16 mov [edx+(nf9_37-nf9_11)], bh mov [edx+(nf9_38-nf9_11)], bl mov al, [esi+14] mov ebx, [ecx+eax*4] mov [edx+(nf9_41-nf9_11)], bh mov [edx+(nf9_42-nf9_11)], bl shr ebx, 16 mov [edx+(nf9_43-nf9_11)], bh mov [edx+(nf9_44-nf9_11)], bl mov al, [esi+15] mov ebx, [ecx+eax*4] mov [edx+(nf9_45-nf9_11)], bh mov [edx+(nf9_46-nf9_11)], bl shr ebx, 16 mov [edx+(nf9_47-nf9_11)], bh mov [edx+(nf9_48-nf9_11)], bl lea edx, [edx+(nf9_51-nf9_11)] mov al, [esi+16] mov ebx, [ecx+eax*4] mov [edx+(nf9_51-nf9_51)], bh mov [edx+(nf9_52-nf9_51)], bl shr ebx, 16 mov [edx+(nf9_53-nf9_51)], bh mov [edx+(nf9_54-nf9_51)], bl mov al, [esi+17] mov ebx, [ecx+eax*4] mov [edx+(nf9_55-nf9_51)], bh mov [edx+(nf9_56-nf9_51)], bl shr ebx, 16 mov [edx+(nf9_57-nf9_51)], bh mov [edx+(nf9_58-nf9_51)], bl mov al, [esi+18] mov ebx, [ecx+eax*4] mov [edx+(nf9_61-nf9_51)], bh mov [edx+(nf9_62-nf9_51)], bl shr ebx, 16 mov [edx+(nf9_63-nf9_51)], bh mov [edx+(nf9_64-nf9_51)], bl mov al, [esi+19] mov ebx, [ecx+eax*4] mov [edx+(nf9_65-nf9_51)], bh mov [edx+(nf9_66-nf9_51)], bl shr ebx, 16 mov [edx+(nf9_67-nf9_51)], bh mov [edx+(nf9_68-nf9_51)], bl mov al, [esi+20] mov ebx, [ecx+eax*4] mov [edx+(nf9_71-nf9_51)], bh mov [edx+(nf9_72-nf9_51)], bl shr ebx, 16 mov [edx+(nf9_73-nf9_51)], bh mov [edx+(nf9_74-nf9_51)], bl mov al, [esi+21] mov ebx, [ecx+eax*4] mov [edx+(nf9_75-nf9_51)], bh mov [edx+(nf9_76-nf9_51)], bl shr ebx, 16 mov [edx+(nf9_77-nf9_51)], bh mov [edx+(nf9_78-nf9_51)], bl mov al, [esi+22] mov ebx, [ecx+eax*4] mov [edx+(nf9_81-nf9_51)], bh mov [edx+(nf9_82-nf9_51)], bl shr ebx, 16 mov [edx+(nf9_83-nf9_51)], bh mov [edx+(nf9_84-nf9_51)], bl mov al, [esi+23] mov ebx, [ecx+eax*4] mov [edx+(nf9_85-nf9_51)], bh mov [edx+(nf9_86-nf9_51)], bl shr ebx, 16 mov [edx+(nf9_87-nf9_51)], bh mov [edx+(nf9_88-nf9_51)], bl push ebp push esi ; Load bx,dx,cx,bp with four colors Trans16 bx, esi Trans16 dx, esi+2 Trans16 cx, esi+4 Trans16 bp, esi+6 mov esi, [nf_width] jmp nf9_0 ; flush prefetch ALIGN 4 nf9_0: nf9_11: mov ax, bx shl eax, 16 nf9_12: mov ax, bx mov [edi], eax nf9_13: mov ax, bx shl eax, 16 nf9_14: mov ax, bx mov [edi+4], eax nf9_15: mov ax, bx shl eax, 16 nf9_16: mov ax, bx mov [edi+8], eax nf9_17: mov ax, bx shl eax, 16 nf9_18: mov ax, bx mov [edi+12], eax add edi, esi nf9_21: mov ax, bx shl eax, 16 nf9_22: mov ax, bx mov [edi], eax nf9_23: mov ax, bx shl eax, 16 nf9_24: mov ax, bx mov [edi+4], eax nf9_25: mov ax, bx shl eax, 16 nf9_26: mov ax, bx mov [edi+8], eax nf9_27: mov ax, bx shl eax, 16 nf9_28: mov ax, bx mov [edi+12], eax add edi, esi nf9_31: mov ax, bx shl eax, 16 nf9_32: mov ax, bx mov [edi], eax nf9_33: mov ax, bx shl eax, 16 nf9_34: mov ax, bx mov [edi+4], eax nf9_35: mov ax, bx shl eax, 16 nf9_36: mov ax, bx mov [edi+8], eax nf9_37: mov ax, bx shl eax, 16 nf9_38: mov ax, bx mov [edi+12], eax add edi, esi nf9_41: mov ax, bx shl eax, 16 nf9_42: mov ax, bx mov [edi], eax nf9_43: mov ax, bx shl eax, 16 nf9_44: mov ax, bx mov [edi+4], eax nf9_45: mov ax, bx shl eax, 16 nf9_46: mov ax, bx mov [edi+8], eax nf9_47: mov ax, bx shl eax, 16 nf9_48: mov ax, bx mov [edi+12], eax add edi, esi nf9_51: mov ax, bx shl eax, 16 nf9_52: mov ax, bx mov [edi], eax nf9_53: mov ax, bx shl eax, 16 nf9_54: mov ax, bx mov [edi+4], eax nf9_55: mov ax, bx shl eax, 16 nf9_56: mov ax, bx mov [edi+8], eax nf9_57: mov ax, bx shl eax, 16 nf9_58: mov ax, bx mov [edi+12], eax add edi, esi nf9_61: mov ax, bx shl eax, 16 nf9_62: mov ax, bx mov [edi], eax nf9_63: mov ax, bx shl eax, 16 nf9_64: mov ax, bx mov [edi+4], eax nf9_65: mov ax, bx shl eax, 16 nf9_66: mov ax, bx mov [edi+8], eax nf9_67: mov ax, bx shl eax, 16 nf9_68: mov ax, bx mov [edi+12], eax add edi, esi nf9_71: mov ax, bx shl eax, 16 nf9_72: mov ax, bx mov [edi], eax nf9_73: mov ax, bx shl eax, 16 nf9_74: mov ax, bx mov [edi+4], eax nf9_75: mov ax, bx shl eax, 16 nf9_76: mov ax, bx mov [edi+8], eax nf9_77: mov ax, bx shl eax, 16 nf9_78: mov ax, bx mov [edi+12], eax add edi, esi nf9_81: mov ax, bx shl eax, 16 nf9_82: mov ax, bx mov [edi], eax nf9_83: mov ax, bx shl eax, 16 nf9_84: mov ax, bx mov [edi+4], eax nf9_85: mov ax, bx shl eax, 16 nf9_86: mov ax, bx mov [edi+8], eax nf9_87: mov ax, bx shl eax, 16 nf9_88: mov ax, bx mov [edi+12], eax pop esi pop ebp add esi, 24 sub edi, [nfpk_back_right] ; br retn ;---------------------------------------- ALIGN 4 ;nf9+16 nf25: ; low 4x4x2 (12 bytes) xor eax, eax lea ecx, [nfhpk_mov4] lea edx, [nf25_11+1] ; Removed byte ds: - AH mov al, [esi+8] mov ebx, [ecx+eax*4] mov [edx+(nf25_11-nf25_11)], bl mov [edx+(nf25_12-nf25_11)], bh shr ebx, 16 mov [edx+(nf25_13-nf25_11)], bl mov [edx+(nf25_14-nf25_11)], bh mov al, [esi+9] mov ebx, [ecx+eax*4] mov [edx+(nf25_21-nf25_11)], bl mov [edx+(nf25_22-nf25_11)], bh shr ebx, 16 mov [edx+(nf25_23-nf25_11)], bl mov [edx+(nf25_24-nf25_11)], bh mov al, [esi+10] mov ebx, [ecx+eax*4] mov [edx+(nf25_31-nf25_11)], bl mov [edx+(nf25_32-nf25_11)], bh shr ebx, 16 mov [edx+(nf25_33-nf25_11)], bl mov [edx+(nf25_34-nf25_11)], bh mov al, [esi+11] mov ebx, [ecx+eax*4] mov [edx+(nf25_41-nf25_11)], bl mov [edx+(nf25_42-nf25_11)], bh shr ebx, 16 mov [edx+(nf25_43-nf25_11)], bl mov [edx+(nf25_44-nf25_11)], bh push ebp push esi ; Load ebx,edx,ecx,ebp with four colors, duplicated in high order. Trans16 cx, esi shrd ebx, ecx, 16 mov bx, cx Trans16 cx, esi+2 shrd edx, ecx, 16 mov dx, cx Trans16_3 cx, esi+4, 1 shrd eax, ecx, 16 mov ax, cx push eax Trans16 cx, esi+6 shrd ebp, ecx, 16 mov bp, cx pop ecx mov esi, [nf_width] jmp nf25_0 ; flush prefetch ALIGN 4 nf25_0: nf25_11:mov eax, ebx mov [edi], eax mov [edi+esi], eax nf25_12:mov eax, ebx mov [edi+4], eax mov [edi+esi+4], eax nf25_13:mov eax, ebx mov [edi+8], eax mov [edi+esi+8], eax nf25_14:mov eax, ebx mov [edi+12], eax mov [edi+esi+12], eax lea edi, [edi+esi*2] nf25_21:mov eax, ebx mov [edi], eax mov [edi+esi], eax nf25_22:mov eax, ebx mov [edi+4], eax mov [edi+esi+4], eax nf25_23:mov eax, ebx mov [edi+8], eax mov [edi+esi+8], eax nf25_24:mov eax, ebx mov [edi+12], eax mov [edi+esi+12], eax lea edi, [edi+esi*2] nf25_31:mov eax, ebx mov [edi], eax mov [edi+esi], eax nf25_32:mov eax, ebx mov [edi+4], eax mov [edi+esi+4], eax nf25_33:mov eax, ebx mov [edi+8], eax mov [edi+esi+8], eax nf25_34:mov eax, ebx mov [edi+12], eax mov [edi+esi+12], eax lea edi, [edi+esi*2] nf25_41:mov eax, ebx mov [edi], eax mov [edi+esi], eax nf25_42:mov eax, ebx mov [edi+4], eax mov [edi+esi+4], eax nf25_43:mov eax, ebx mov [edi+8], eax mov [edi+esi+8], eax nf25_44:mov eax, ebx mov [edi+12], eax mov [edi+esi+12], eax add edi, esi pop esi pop ebp add esi, 12 sub edi, [nfpk_back_right] ;br ; (SHEIGHT-1)*width+8 retn ;---------------------------------------- ALIGN 4 ;nf9+32 nf41: ; low 4x8x2 (16 bytes) test word [esi+4], 08000h jnz near nf57 xor eax, eax lea ecx, [nfhpk_mov8] lea edx, [nf41_11+1] ; Removed byte ds: - AH mov al, [esi+8] mov ebx, [ecx+eax*4] mov [edx+(nf41_11-nf41_11)], bl mov [edx+(nf41_12-nf41_11)], bh shr ebx, 16 mov [edx+(nf41_13-nf41_11)], bl mov [edx+(nf41_14-nf41_11)], bh mov al, [esi+9] mov ebx, [ecx+eax*4] mov [edx+(nf41_21-nf41_11)], bl mov [edx+(nf41_22-nf41_11)], bh shr ebx, 16 mov [edx+(nf41_23-nf41_11)], bl mov [edx+(nf41_24-nf41_11)], bh mov al, [esi+10] mov ebx, [ecx+eax*4] mov [edx+(nf41_31-nf41_11)], bl mov [edx+(nf41_32-nf41_11)], bh shr ebx, 16 mov [edx+(nf41_33-nf41_11)], bl mov [edx+(nf41_34-nf41_11)], bh mov al, [esi+11] mov ebx, [ecx+eax*4] mov [edx+(nf41_41-nf41_11)], bl mov [edx+(nf41_42-nf41_11)], bh shr ebx, 16 mov [edx+(nf41_43-nf41_11)], bl mov [edx+(nf41_44-nf41_11)], bh lea edx, [edx+(nf41_51-nf41_11)] mov al, [esi+12] mov ebx, [ecx+eax*4] mov [edx+(nf41_51-nf41_51)], bl mov [edx+(nf41_52-nf41_51)], bh shr ebx, 16 mov [edx+(nf41_53-nf41_51)], bl mov [edx+(nf41_54-nf41_51)], bh mov al, [esi+13] mov ebx, [ecx+eax*4] mov [edx+(nf41_61-nf41_51)], bl mov [edx+(nf41_62-nf41_51)], bh shr ebx, 16 mov [edx+(nf41_63-nf41_51)], bl mov [edx+(nf41_64-nf41_51)], bh mov al, [esi+14] mov ebx, [ecx+eax*4] mov [edx+(nf41_71-nf41_51)], bl mov [edx+(nf41_72-nf41_51)], bh shr ebx, 16 mov [edx+(nf41_73-nf41_51)], bl mov [edx+(nf41_74-nf41_51)], bh mov al, [esi+15] mov ebx, [ecx+eax*4] mov [edx+(nf41_81-nf41_51)], bl mov [edx+(nf41_82-nf41_51)], bh shr ebx, 16 mov [edx+(nf41_83-nf41_51)], bl mov [edx+(nf41_84-nf41_51)], bh push ebp push esi ; Load ebx,edx,ecx,ebp with four colors, duplicated in high order. Trans16_3 cx, esi, 1 shrd ebx, ecx, 16 mov bx, cx Trans16 cx, esi+2 shrd edx, ecx, 16 mov dx, cx Trans16 cx, esi+4 shrd eax, ecx, 16 mov ax, cx push eax Trans16 cx, esi+6 shrd ebp, ecx, 16 mov bp, cx pop ecx mov esi, [nf_width] jmp nf41_0 ; flush prefetch ALIGN 4 nf41_0: nf41_11:mov [ebp+0], ebx nf41_12:mov [ebp+4], ebx nf41_13:mov [ebp+8], ebx nf41_14:mov [ebp+12], ebx add edi, esi nf41_21:mov [ebp+0], ebx nf41_22:mov [ebp+4], ebx nf41_23:mov [ebp+8], ebx nf41_24:mov [ebp+12], ebx add edi, esi nf41_31:mov [ebp+0], ebx nf41_32:mov [ebp+4], ebx nf41_33:mov [ebp+8], ebx nf41_34:mov [ebp+12], ebx add edi, esi nf41_41:mov [ebp+0], ebx nf41_42:mov [ebp+4], ebx nf41_43:mov [ebp+8], ebx nf41_44:mov [ebp+12], ebx add edi, esi nf41_51:mov [ebp+0], ebx nf41_52:mov [ebp+4], ebx nf41_53:mov [ebp+8], ebx nf41_54:mov [ebp+12], ebx add edi, esi nf41_61:mov [ebp+0], ebx nf41_62:mov [ebp+4], ebx nf41_63:mov [ebp+8], ebx nf41_64:mov [ebp+12], ebx add edi, esi nf41_71:mov [ebp+0], ebx nf41_72:mov [ebp+4], ebx nf41_73:mov [ebp+8], ebx nf41_74:mov [ebp+12], ebx add edi, esi nf41_81:mov [ebp+0], ebx nf41_82:mov [ebp+4], ebx nf41_83:mov [ebp+8], ebx nf41_84:mov [ebp+12], ebx pop esi pop ebp add esi, 16 sub edi, [nfpk_back_right] ;br ; (SHEIGHT-1)*width+8 retn ;---------------------------------------- ALIGN 4 ;nf9+48 nf57: ; low 8x4x2 (16 bytes) xor eax, eax lea ecx, [nfhpk_mov4] lea edx, [nf57_11+2] ; Removed byte ds: - AH mov al, [esi+8] mov ebx, [ecx+eax*4] mov [edx+(nf57_11-nf57_11)], bh mov [edx+(nf57_12-nf57_11)], bl shr ebx, 16 mov [edx+(nf57_13-nf57_11)], bh mov [edx+(nf57_14-nf57_11)], bl mov al, [esi+9] mov ebx, [ecx+eax*4] mov [edx+(nf57_15-nf57_11)], bh mov [edx+(nf57_16-nf57_11)], bl shr ebx, 16 mov [edx+(nf57_17-nf57_11)], bh mov [edx+(nf57_18-nf57_11)], bl mov al, [esi+10] mov ebx, [ecx+eax*4] mov [edx+(nf57_21-nf57_11)], bh mov [edx+(nf57_22-nf57_11)], bl shr ebx, 16 mov [edx+(nf57_23-nf57_11)], bh mov [edx+(nf57_24-nf57_11)], bl mov al, [esi+11] mov ebx, [ecx+eax*4] mov [edx+(nf57_25-nf57_11)], bh mov [edx+(nf57_26-nf57_11)], bl shr ebx, 16 mov [edx+(nf57_27-nf57_11)], bh mov [edx+(nf57_28-nf57_11)], bl mov al, [esi+12] mov ebx, [ecx+eax*4] mov [edx+(nf57_31-nf57_11)], bh mov [edx+(nf57_32-nf57_11)], bl shr ebx, 16 mov [edx+(nf57_33-nf57_11)], bh mov [edx+(nf57_34-nf57_11)], bl mov al, [esi+13] mov ebx, [ecx+eax*4] mov [edx+(nf57_35-nf57_11)], bh mov [edx+(nf57_36-nf57_11)], bl shr ebx, 16 mov [edx+(nf57_37-nf57_11)], bh mov [edx+(nf57_38-nf57_11)], bl mov al, [esi+14] mov ebx, [ecx+eax*4] mov [edx+(nf57_41-nf57_11)], bh mov [edx+(nf57_42-nf57_11)], bl shr ebx, 16 mov [edx+(nf57_43-nf57_11)], bh mov [edx+(nf57_44-nf57_11)], bl mov al, [esi+15] mov ebx, [ecx+eax*4] mov [edx+(nf57_45-nf57_11)], bh mov [edx+(nf57_46-nf57_11)], bl shr ebx, 16 mov [edx+(nf57_47-nf57_11)], bh mov [edx+(nf57_48-nf57_11)], bl push ebp push esi ; Load bx,dx,cx,bp with four colors Trans16_3 bx, esi, 1 Trans16 dx, esi+2 Trans16_3 cx, esi+4, 1 Trans16 bp, esi+6 mov esi, [nf_width] jmp nf57_0 ; flush prefetch ALIGN 4 nf57_0: nf57_11:mov ax, bx shl eax, 16 nf57_12:mov ax, bx mov [edi], eax mov [edi+esi], eax nf57_13:mov ax, bx shl eax, 16 nf57_14:mov ax, bx mov [edi+4], eax mov [edi+esi+4], eax nf57_15:mov ax, bx shl eax, 16 nf57_16:mov ax, bx mov [edi+8], eax mov [edi+esi+8], eax nf57_17:mov ax, bx shl eax, 16 nf57_18:mov ax, bx mov [edi+12], eax mov [edi+esi+12], eax lea edi, [edi+esi*2] nf57_21:mov ax, bx shl eax, 16 nf57_22:mov ax, bx mov [edi], eax mov [edi+esi], eax nf57_23:mov ax, bx shl eax, 16 nf57_24:mov ax, bx mov [edi+4], eax mov [edi+esi+4], eax nf57_25:mov ax, bx shl eax, 16 nf57_26:mov ax, bx mov [edi+8], eax mov [edi+esi+8], eax nf57_27:mov ax, bx shl eax, 16 nf57_28:mov ax, bx mov [edi+12], eax mov [edi+esi+12], eax lea edi, [edi+esi*2] nf57_31:mov ax, bx shl eax, 16 nf57_32:mov ax, bx mov [edi], eax mov [edi+esi], eax nf57_33:mov ax, bx shl eax, 16 nf57_34:mov ax, bx mov [edi+4], eax mov [edi+esi+4], eax nf57_35:mov ax, bx shl eax, 16 nf57_36:mov ax, bx mov [edi+8], eax mov [edi+esi+8], eax nf57_37:mov ax, bx shl eax, 16 nf57_38:mov ax, bx mov [edi+12], eax mov [edi+esi+12], eax lea edi, [edi+esi*2] nf57_41:mov ax, bx shl eax, 16 nf57_42:mov ax, bx mov [edi], eax mov [edi+esi], eax nf57_43:mov ax, bx shl eax, 16 nf57_44:mov ax, bx mov [edi+4], eax mov [edi+esi+4], eax nf57_45:mov ax, bx shl eax, 16 nf57_46:mov ax, bx mov [edi+8], eax mov [edi+esi+8], eax nf57_47:mov ax, bx shl eax, 16 nf57_48:mov ax, bx mov [edi+12], eax mov [edi+esi+12], eax add edi, esi pop esi pop ebp add esi, 16 sub edi, [nfpk_back_right] ;br retn ;---------------------------------------- ALIGN 4 nf10: ; 2x2 4x4x2 (48 bytes) test word [esi], 08000h jnz near nf26 xor eax, eax lea ecx, [nfhpk_mov4] lea edx, [nf10_11+2] ; Remove byte ds: - AH mov al, [esi+8] mov ebx, [ecx+eax*4] mov [edx+(nf10_11-nf10_11)], bh mov [edx+(nf10_12-nf10_11)], bl shr ebx, 16 mov [edx+(nf10_13-nf10_11)], bh mov [edx+(nf10_14-nf10_11)], bl mov al, [esi+9] mov ebx, [ecx+eax*4] mov [edx+(nf10_15-nf10_11)], bh mov [edx+(nf10_16-nf10_11)], bl shr ebx, 16 mov [edx+(nf10_17-nf10_11)], bh mov [edx+(nf10_18-nf10_11)], bl mov al, [esi+10] mov ebx, [ecx+eax*4] mov [edx+(nf10_21-nf10_11)], bh mov [edx+(nf10_22-nf10_11)], bl shr ebx, 16 mov [edx+(nf10_23-nf10_11)], bh mov [edx+(nf10_24-nf10_11)], bl mov al, [esi+11] mov ebx, [ecx+eax*4] mov [edx+(nf10_25-nf10_11)], bh mov [edx+(nf10_26-nf10_11)], bl shr ebx, 16 mov [edx+(nf10_27-nf10_11)], bh mov [edx+(nf10_28-nf10_11)], bl mov al, [esi+20] mov ebx, [ecx+eax*4] mov [edx+(nf10_31-nf10_11)], bh mov [edx+(nf10_32-nf10_11)], bl shr ebx, 16 mov [edx+(nf10_33-nf10_11)], bh mov [edx+(nf10_34-nf10_11)], bl mov al, [esi+21] mov ebx, [ecx+eax*4] mov [edx+(nf10_35-nf10_11)], bh mov [edx+(nf10_36-nf10_11)], bl shr ebx, 16 mov [edx+(nf10_37-nf10_11)], bh mov [edx+(nf10_38-nf10_11)], bl mov al, [esi+22] mov ebx, [ecx+eax*4] mov [edx+(nf10_41-nf10_11)], bh mov [edx+(nf10_42-nf10_11)], bl shr ebx, 16 mov [edx+(nf10_43-nf10_11)], bh mov [edx+(nf10_44-nf10_11)], bl mov al, [esi+23] mov ebx, [ecx+eax*4] mov [edx+(nf10_45-nf10_11)], bh mov [edx+(nf10_46-nf10_11)], bl shr ebx, 16 mov [edx+(nf10_47-nf10_11)], bh mov [edx+(nf10_48-nf10_11)], bl lea edx, [edx+(nf10_51-nf10_11)] mov al, [esi+32] mov ebx, [ecx+eax*4] mov [edx+(nf10_51-nf10_51)], bh mov [edx+(nf10_52-nf10_51)], bl shr ebx, 16 mov [edx+(nf10_53-nf10_51)], bh mov [edx+(nf10_54-nf10_51)], bl mov al, [esi+33] mov ebx, [ecx+eax*4] mov [edx+(nf10_55-nf10_51)], bh mov [edx+(nf10_56-nf10_51)], bl shr ebx, 16 mov [edx+(nf10_57-nf10_51)], bh mov [edx+(nf10_58-nf10_51)], bl mov al, [esi+34] mov ebx, [ecx+eax*4] mov [edx+(nf10_61-nf10_51)], bh mov [edx+(nf10_62-nf10_51)], bl shr ebx, 16 mov [edx+(nf10_63-nf10_51)], bh mov [edx+(nf10_64-nf10_51)], bl mov al, [esi+35] mov ebx, [ecx+eax*4] mov [edx+(nf10_65-nf10_51)], bh mov [edx+(nf10_66-nf10_51)], bl shr ebx, 16 mov [edx+(nf10_67-nf10_51)], bh mov [edx+(nf10_68-nf10_51)], bl mov al, [esi+44] mov ebx, [ecx+eax*4] mov [edx+(nf10_71-nf10_51)], bh mov [edx+(nf10_72-nf10_51)], bl shr ebx, 16 mov [edx+(nf10_73-nf10_51)], bh mov [edx+(nf10_74-nf10_51)], bl mov al, [esi+45] mov ebx, [ecx+eax*4] mov [edx+(nf10_75-nf10_51)], bh mov [edx+(nf10_76-nf10_51)], bl shr ebx, 16 mov [edx+(nf10_77-nf10_51)], bh mov [edx+(nf10_78-nf10_51)], bl mov al, [esi+46] mov ebx, [ecx+eax*4] mov [edx+(nf10_81-nf10_51)], bh mov [edx+(nf10_82-nf10_51)], bl shr ebx, 16 mov [edx+(nf10_83-nf10_51)], bh mov [edx+(nf10_84-nf10_51)], bl mov al, [esi+47] mov ebx, [ecx+eax*4] mov [edx+(nf10_85-nf10_51)], bh mov [edx+(nf10_86-nf10_51)], bl shr ebx, 16 mov [edx+(nf10_87-nf10_51)], bh mov [edx+(nf10_88-nf10_51)], bl push ebp push esi ; Load bx,dx,cx,bp with four colors Trans16 bx, esi Trans16 dx, esi+2 Trans16 cx, esi+4 Trans16 bp, esi+6 mov esi, [nf_width] jmp nf10_0 ; flush prefetch ALIGN 4 nf10_0: nf10_11:mov ax, bx shl eax, 16 nf10_12:mov ax, bx mov [edi], eax nf10_13:mov ax, bx shl eax, 16 nf10_14:mov ax, bx mov [edi+4], eax add edi, esi nf10_15:mov ax, bx shl eax, 16 nf10_16:mov ax, bx mov [edi], eax nf10_17:mov ax, bx shl eax, 16 nf10_18:mov ax, bx mov [edi+4], eax add edi, esi nf10_21:mov ax, bx shl eax, 16 nf10_22:mov ax, bx mov [edi], eax nf10_23:mov ax, bx shl eax, 16 nf10_24:mov ax, bx mov [edi+4], eax add edi, esi nf10_25:mov ax, bx shl eax, 16 nf10_26:mov ax, bx mov [edi], eax nf10_27:mov ax, bx shl eax, 16 nf10_28:mov ax, bx mov [edi+4], eax add edi, esi ; Load bx,dx,cx,bp with four colors mov esi, [esp] Trans16 bx, esi+12 Trans16 dx, esi+14 Trans16 cx, esi+16 Trans16 bp, esi+18 mov esi, [nf_width] nf10_31:mov ax, bx shl eax, 16 nf10_32:mov ax, bx mov [edi], eax nf10_33:mov ax, bx shl eax, 16 nf10_34:mov ax, bx mov [edi+4], eax add edi, esi nf10_35:mov ax, bx shl eax, 16 nf10_36:mov ax, bx mov [edi], eax nf10_37:mov ax, bx shl eax, 16 nf10_38:mov ax, bx mov [edi+4], eax add edi, esi nf10_41:mov ax, bx shl eax, 16 nf10_42:mov ax, bx mov [edi], eax nf10_43:mov ax, bx shl eax, 16 nf10_44:mov ax, bx mov [edi+4], eax add edi, esi nf10_45:mov ax, bx shl eax, 16 nf10_46:mov ax, bx mov [edi], eax nf10_47:mov ax, bx shl eax, 16 nf10_48:mov ax, bx mov [edi+4], eax add edi, esi lea eax, [esi*8-8] sub edi, eax ; Load bx,dx,cx,bp with four colors mov esi, [esp] Trans16 bx, esi+24 Trans16 dx, esi+26 Trans16 cx, esi+28 Trans16 bp, esi+30 mov esi, [nf_width] nf10_51:mov ax, bx shl eax, 16 nf10_52:mov ax, bx mov [edi], eax nf10_53:mov ax, bx shl eax, 16 nf10_54:mov ax, bx mov [edi+4], eax add edi, esi nf10_55:mov ax, bx shl eax, 16 nf10_56:mov ax, bx mov [edi], eax nf10_57:mov ax, bx shl eax, 16 nf10_58:mov ax, bx mov [edi+4], eax add edi, esi nf10_61:mov ax, bx shl eax, 16 nf10_62:mov ax, bx mov [edi], eax nf10_63:mov ax, bx shl eax, 16 nf10_64:mov ax, bx mov [edi+4], eax add edi, esi nf10_65:mov ax, bx shl eax, 16 nf10_66:mov ax, bx mov [edi], eax nf10_67:mov ax, bx shl eax, 16 nf10_68:mov ax, bx mov [edi+4], eax add edi, esi ; Load bx,dx,cx,bp with four colors mov esi, [esp] Trans16 bx, esi+36 Trans16 dx, esi+38 Trans16 cx, esi+40 Trans16 bp, esi+42 mov esi, [nf_width] nf10_71:mov ax, bx shl eax, 16 nf10_72:mov ax, bx mov [edi], eax nf10_73:mov ax, bx shl eax, 16 nf10_74:mov ax, bx mov [edi+4], eax add edi, esi nf10_75:mov ax, bx shl eax, 16 nf10_76:mov ax, bx mov [edi], eax nf10_77:mov ax, bx shl eax, 16 nf10_78:mov ax, bx mov [edi+4], eax add edi, esi nf10_81:mov ax, bx shl eax, 16 nf10_82:mov ax, bx mov [edi], eax nf10_83:mov ax, bx shl eax, 16 nf10_84:mov ax, bx mov [edi+4], eax add edi, esi nf10_85:mov ax, bx shl eax, 16 nf10_86:mov ax, bx mov [edi], eax nf10_87:mov ax, bx shl eax, 16 nf10_88:mov ax, bx mov [edi+4], eax pop esi pop ebp add esi, 48 sub edi, 8 sub edi, [nfpk_back_right] ;br ; (SHEIGHT-1)*width+8 retn ;---------------------------------------- ALIGN 4 ;nf10+16 nf26: ; 2x1 4x8x2 (32 bytes) test word [esi+16], 08000h jnz near nf42 xor eax, eax lea ecx, [nfhpk_mov4] lea edx, [nf26_11+2] ; Removed byte ds: - AH mov al, [esi+8] mov ebx, [ecx+eax*4] mov [edx+(nf26_11-nf26_11)], bh mov [edx+(nf26_12-nf26_11)], bl shr ebx, 16 mov [edx+(nf26_13-nf26_11)], bh mov [edx+(nf26_14-nf26_11)], bl mov al, [esi+9] mov ebx, [ecx+eax*4] mov [edx+(nf26_15-nf26_11)], bh mov [edx+(nf26_16-nf26_11)], bl shr ebx, 16 mov [edx+(nf26_17-nf26_11)], bh mov [edx+(nf26_18-nf26_11)], bl mov al, [esi+10] mov ebx, [ecx+eax*4] mov [edx+(nf26_21-nf26_11)], bh mov [edx+(nf26_22-nf26_11)], bl shr ebx, 16 mov [edx+(nf26_23-nf26_11)], bh mov [edx+(nf26_24-nf26_11)], bl mov al, [esi+11] mov ebx, [ecx+eax*4] mov [edx+(nf26_25-nf26_11)], bh mov [edx+(nf26_26-nf26_11)], bl shr ebx, 16 mov [edx+(nf26_27-nf26_11)], bh mov [edx+(nf26_28-nf26_11)], bl mov al, [esi+12] mov ebx, [ecx+eax*4] mov [edx+(nf26_31-nf26_11)], bh mov [edx+(nf26_32-nf26_11)], bl shr ebx, 16 mov [edx+(nf26_33-nf26_11)], bh mov [edx+(nf26_34-nf26_11)], bl mov al, [esi+13] mov ebx, [ecx+eax*4] mov [edx+(nf26_35-nf26_11)], bh mov [edx+(nf26_36-nf26_11)], bl shr ebx, 16 mov [edx+(nf26_37-nf26_11)], bh mov [edx+(nf26_38-nf26_11)], bl mov al, [esi+14] mov ebx, [ecx+eax*4] mov [edx+(nf26_41-nf26_11)], bh mov [edx+(nf26_42-nf26_11)], bl shr ebx, 16 mov [edx+(nf26_43-nf26_11)], bh mov [edx+(nf26_44-nf26_11)], bl mov al, [esi+15] mov ebx, [ecx+eax*4] mov [edx+(nf26_45-nf26_11)], bh mov [edx+(nf26_46-nf26_11)], bl shr ebx, 16 mov [edx+(nf26_47-nf26_11)], bh mov [edx+(nf26_48-nf26_11)], bl lea edx, [edx+(nf26_51-nf26_11)] mov al, [esi+24] mov ebx, [ecx+eax*4] mov [edx+(nf26_51-nf26_51)], bh mov [edx+(nf26_52-nf26_51)], bl shr ebx, 16 mov [edx+(nf26_53-nf26_51)], bh mov [edx+(nf26_54-nf26_51)], bl mov al, [esi+25] mov ebx, [ecx+eax*4] mov [edx+(nf26_55-nf26_51)], bh mov [edx+(nf26_56-nf26_51)], bl shr ebx, 16 mov [edx+(nf26_57-nf26_51)], bh mov [edx+(nf26_58-nf26_51)], bl mov al, [esi+26] mov ebx, [ecx+eax*4] mov [edx+(nf26_61-nf26_51)], bh mov [edx+(nf26_62-nf26_51)], bl shr ebx, 16 mov [edx+(nf26_63-nf26_51)], bh mov [edx+(nf26_64-nf26_51)], bl mov al, [esi+27] mov ebx, [ecx+eax*4] mov [edx+(nf26_65-nf26_51)], bh mov [edx+(nf26_66-nf26_51)], bl shr ebx, 16 mov [edx+(nf26_67-nf26_51)], bh mov [edx+(nf26_68-nf26_51)], bl mov al, [esi+28] mov ebx, [ecx+eax*4] mov [edx+(nf26_71-nf26_51)], bh mov [edx+(nf26_72-nf26_51)], bl shr ebx, 16 mov [edx+(nf26_73-nf26_51)], bh mov [edx+(nf26_74-nf26_51)], bl mov al, [esi+29] mov ebx, [ecx+eax*4] mov [edx+(nf26_75-nf26_51)], bh mov [edx+(nf26_76-nf26_51)], bl shr ebx, 16 mov [edx+(nf26_77-nf26_51)], bh mov [edx+(nf26_78-nf26_51)], bl mov al, [esi+30] mov ebx, [ecx+eax*4] mov [edx+(nf26_81-nf26_51)], bh mov [edx+(nf26_82-nf26_51)], bl shr ebx, 16 mov [edx+(nf26_83-nf26_51)], bh mov [edx+(nf26_84-nf26_51)], bl mov al, [esi+31] mov ebx, [ecx+eax*4] mov [edx+(nf26_85-nf26_51)], bh mov [edx+(nf26_86-nf26_51)], bl shr ebx, 16 mov [edx+(nf26_87-nf26_51)], bh mov [edx+(nf26_88-nf26_51)], bl push ebp push esi ; Load bx,dx,cx,bp with four colors Trans16_3 bx, esi, 1 Trans16 dx, esi+2 Trans16 cx, esi+4 Trans16 bp, esi+6 mov esi, [nf_width] jmp nf26_0 ; flush prefetch ALIGN 4 nf26_0: nf26_11:mov ax, bx shl eax, 16 nf26_12:mov ax, bx mov [edi], eax nf26_13:mov ax, bx shl eax, 16 nf26_14:mov ax, bx mov [edi+4], eax add edi, esi nf26_15:mov ax, bx shl eax, 16 nf26_16:mov ax, bx mov [edi], eax nf26_17:mov ax, bx shl eax, 16 nf26_18:mov ax, bx mov [edi+4], eax add edi, esi nf26_21:mov ax, bx shl eax, 16 nf26_22:mov ax, bx mov [edi], eax nf26_23:mov ax, bx shl eax, 16 nf26_24:mov ax, bx mov [edi+4], eax add edi, esi nf26_25:mov ax, bx shl eax, 16 nf26_26:mov ax, bx mov [edi], eax nf26_27:mov ax, bx shl eax, 16 nf26_28:mov ax, bx mov [edi+4], eax add edi, esi nf26_31:mov ax, bx shl eax, 16 nf26_32:mov ax, bx mov [edi], eax nf26_33:mov ax, bx shl eax, 16 nf26_34:mov ax, bx mov [edi+4], eax add edi, esi nf26_35:mov ax, bx shl eax, 16 nf26_36:mov ax, bx mov [edi], eax nf26_37:mov ax, bx shl eax, 16 nf26_38:mov ax, bx mov [edi+4], eax add edi, esi nf26_41:mov ax, bx shl eax, 16 nf26_42:mov ax, bx mov [edi], eax nf26_43:mov ax, bx shl eax, 16 nf26_44:mov ax, bx mov [edi+4], eax add edi, esi nf26_45:mov ax, bx shl eax, 16 nf26_46:mov ax, bx mov [edi], eax nf26_47:mov ax, bx shl eax, 16 nf26_48:mov ax, bx mov [edi+4], eax add edi, esi lea eax, [esi*8-8] sub edi, eax ; Load bx,dx,cx,bp with four colors mov esi, [esp] Trans16 bx, esi+16 Trans16 dx, esi+18 Trans16 cx, esi+20 Trans16 bp, esi+22 mov esi, [nf_width] nf26_51:mov ax, bx shl eax, 16 nf26_52:mov ax, bx mov [edi], eax nf26_53:mov ax, bx shl eax, 16 nf26_54:mov ax, bx mov [edi+4], eax add edi, esi nf26_55:mov ax, bx shl eax, 16 nf26_56:mov ax, bx mov [edi], eax nf26_57:mov ax, bx shl eax, 16 nf26_58:mov ax, bx mov [edi+4], eax add edi, esi nf26_61:mov ax, bx shl eax, 16 nf26_62:mov ax, bx mov [edi], eax nf26_63:mov ax, bx shl eax, 16 nf26_64:mov ax, bx mov [edi+4], eax add edi, esi nf26_65:mov ax, bx shl eax, 16 nf26_66:mov ax, bx mov [edi], eax nf26_67:mov ax, bx shl eax, 16 nf26_68:mov ax, bx mov [edi+4], eax add edi, esi nf26_71:mov ax, bx shl eax, 16 nf26_72:mov ax, bx mov [edi], eax nf26_73:mov ax, bx shl eax, 16 nf26_74:mov ax, bx mov [edi+4], eax add edi, esi nf26_75:mov ax, bx shl eax, 16 nf26_76:mov ax, bx mov [edi], eax nf26_77:mov ax, bx shl eax, 16 nf26_78:mov ax, bx mov [edi+4], eax add edi, esi nf26_81:mov ax, bx shl eax, 16 nf26_82:mov ax, bx mov [edi], eax nf26_83:mov ax, bx shl eax, 16 nf26_84:mov ax, bx mov [edi+4], eax add edi, esi nf26_85:mov ax, bx shl eax, 16 nf26_86:mov ax, bx mov [edi], eax nf26_87:mov ax, bx shl eax, 16 nf26_88:mov ax, bx mov [edi+4], eax pop esi pop ebp add esi, 32 sub edi, 8 sub edi, [nfpk_back_right] ;br ; (SHEIGHT-1)*width+8 retn ;---------------------------------------- ALIGN 4 ;nf10+32 nf42: ; 1x2 8x4x2 (32 bytes) xor eax, eax lea ecx, [nfhpk_mov4] lea edx, [nf42_11+2] ; removed byte ds: - AH mov al, [esi+8] mov ebx, [ecx+eax*4] mov [edx+(nf42_11-nf42_11)], bh mov [edx+(nf42_12-nf42_11)], bl shr ebx, 16 mov [edx+(nf42_13-nf42_11)], bh mov [edx+(nf42_14-nf42_11)], bl mov al, [esi+9] mov ebx, [ecx+eax*4] mov [edx+(nf42_15-nf42_11)], bh mov [edx+(nf42_16-nf42_11)], bl shr ebx, 16 mov [edx+(nf42_17-nf42_11)], bh mov [edx+(nf42_18-nf42_11)], bl mov al, [esi+10] mov ebx, [ecx+eax*4] mov [edx+(nf42_21-nf42_11)], bh mov [edx+(nf42_22-nf42_11)], bl shr ebx, 16 mov [edx+(nf42_23-nf42_11)], bh mov [edx+(nf42_24-nf42_11)], bl mov al, [esi+11] mov ebx, [ecx+eax*4] mov [edx+(nf42_25-nf42_11)], bh mov [edx+(nf42_26-nf42_11)], bl shr ebx, 16 mov [edx+(nf42_27-nf42_11)], bh mov [edx+(nf42_28-nf42_11)], bl mov al, [esi+12] mov ebx, [ecx+eax*4] mov [edx+(nf42_31-nf42_11)], bh mov [edx+(nf42_32-nf42_11)], bl shr ebx, 16 mov [edx+(nf42_33-nf42_11)], bh mov [edx+(nf42_34-nf42_11)], bl mov al, [esi+13] mov ebx, [ecx+eax*4] mov [edx+(nf42_35-nf42_11)], bh mov [edx+(nf42_36-nf42_11)], bl shr ebx, 16 mov [edx+(nf42_37-nf42_11)], bh mov [edx+(nf42_38-nf42_11)], bl mov al, [esi+14] mov ebx, [ecx+eax*4] mov [edx+(nf42_41-nf42_11)], bh mov [edx+(nf42_42-nf42_11)], bl shr ebx, 16 mov [edx+(nf42_43-nf42_11)], bh mov [edx+(nf42_44-nf42_11)], bl mov al, [esi+15] mov ebx, [ecx+eax*4] mov [edx+(nf42_45-nf42_11)], bh mov [edx+(nf42_46-nf42_11)], bl shr ebx, 16 mov [edx+(nf42_47-nf42_11)], bh mov [edx+(nf42_48-nf42_11)], bl lea edx, [edx+(nf42_51-nf42_11)] mov al, [esi+24] mov ebx, [ecx+eax*4] mov [edx+(nf42_51-nf42_51)], bh mov [edx+(nf42_52-nf42_51)], bl shr ebx, 16 mov [edx+(nf42_53-nf42_51)], bh mov [edx+(nf42_54-nf42_51)], bl mov al, [esi+25] mov ebx, [ecx+eax*4] mov [edx+(nf42_55-nf42_51)], bh mov [edx+(nf42_56-nf42_51)], bl shr ebx, 16 mov [edx+(nf42_57-nf42_51)], bh mov [edx+(nf42_58-nf42_51)], bl mov al, [esi+26] mov ebx, [ecx+eax*4] mov [edx+(nf42_61-nf42_51)], bh mov [edx+(nf42_62-nf42_51)], bl shr ebx, 16 mov [edx+(nf42_63-nf42_51)], bh mov [edx+(nf42_64-nf42_51)], bl mov al, [esi+27] mov ebx, [ecx+eax*4] mov [edx+(nf42_65-nf42_51)], bh mov [edx+(nf42_66-nf42_51)], bl shr ebx, 16 mov [edx+(nf42_67-nf42_51)], bh mov [edx+(nf42_68-nf42_51)], bl mov al, [esi+28] mov ebx, [ecx+eax*4] mov [edx+(nf42_71-nf42_51)], bh mov [edx+(nf42_72-nf42_51)], bl shr ebx, 16 mov [edx+(nf42_73-nf42_51)], bh mov [edx+(nf42_74-nf42_51)], bl mov al, [esi+29] mov ebx, [ecx+eax*4] mov [edx+(nf42_75-nf42_51)], bh mov [edx+(nf42_76-nf42_51)], bl shr ebx, 16 mov [edx+(nf42_77-nf42_51)], bh mov [edx+(nf42_78-nf42_51)], bl mov al, [esi+30] mov ebx, [ecx+eax*4] mov [edx+(nf42_81-nf42_51)], bh mov [edx+(nf42_82-nf42_51)], bl shr ebx, 16 mov [edx+(nf42_83-nf42_51)], bh mov [edx+(nf42_84-nf42_51)], bl mov al, [esi+31] mov ebx, [ecx+eax*4] mov [edx+(nf42_85-nf42_51)], bh mov [edx+(nf42_86-nf42_51)], bl shr ebx, 16 mov [edx+(nf42_87-nf42_51)], bh mov [edx+(nf42_88-nf42_51)], bl push ebp push esi ; Load bx,dx,cx,bp with four colors Trans16_3 bx, esi, 1 Trans16 dx, esi+2 Trans16 cx, esi+4 Trans16 bp, esi+6 mov esi, [nf_width] jmp nf42_0 ; flush prefetch ALIGN 4 nf42_0: nf42_11:mov ax, bx shl eax, 16 nf42_12:mov ax, bx mov [edi], eax nf42_13:mov ax, bx shl eax, 16 nf42_14:mov ax, bx mov [edi+4], eax nf42_15:mov ax, bx shl eax, 16 nf42_16:mov ax, bx mov [edi+8], eax nf42_17:mov ax, bx shl eax, 16 nf42_18:mov ax, bx mov [edi+12], eax add edi, esi nf42_21:mov ax, bx shl eax, 16 nf42_22:mov ax, bx mov [edi], eax nf42_23:mov ax, bx shl eax, 16 nf42_24:mov ax, bx mov [edi+4], eax nf42_25:mov ax, bx shl eax, 16 nf42_26:mov ax, bx mov [edi+8], eax nf42_27:mov ax, bx shl eax, 16 nf42_28:mov ax, bx mov [edi+12], eax add edi, esi nf42_31:mov ax, bx shl eax, 16 nf42_32:mov ax, bx mov [edi], eax nf42_33:mov ax, bx shl eax, 16 nf42_34:mov ax, bx mov [edi+4], eax nf42_35:mov ax, bx shl eax, 16 nf42_36:mov ax, bx mov [edi+8], eax nf42_37:mov ax, bx shl eax, 16 nf42_38:mov ax, bx mov [edi+12], eax add edi, esi nf42_41:mov ax, bx shl eax, 16 nf42_42:mov ax, bx mov [edi], eax nf42_43:mov ax, bx shl eax, 16 nf42_44:mov ax, bx mov [edi+4], eax nf42_45:mov ax, bx shl eax, 16 nf42_46:mov ax, bx mov [edi+8], eax nf42_47:mov ax, bx shl eax, 16 nf42_48:mov ax, bx mov [edi+12], eax add edi, esi ; Load bx,dx,cx,bp with four colors mov esi, [esp] Trans16_3 bx, esi+16, 1 Trans16 dx, esi+18 Trans16 cx, esi+20 Trans16 bp, esi+22 mov esi, [nf_width] nf42_51:mov ax, bx shl eax, 16 nf42_52:mov ax, bx mov [edi], eax nf42_53:mov ax, bx shl eax, 16 nf42_54:mov ax, bx mov [edi+4], eax nf42_55:mov ax, bx shl eax, 16 nf42_56:mov ax, bx mov [edi+8], eax nf42_57:mov ax, bx shl eax, 16 nf42_58:mov ax, bx mov [edi+12], eax add edi, esi nf42_61:mov ax, bx shl eax, 16 nf42_62:mov ax, bx mov [edi], eax nf42_63:mov ax, bx shl eax, 16 nf42_64:mov ax, bx mov [edi+4], eax nf42_65:mov ax, bx shl eax, 16 nf42_66:mov ax, bx mov [edi+8], eax nf42_67:mov ax, bx shl eax, 16 nf42_68:mov ax, bx mov [edi+12], eax add edi, esi nf42_71:mov ax, bx shl eax, 16 nf42_72:mov ax, bx mov [edi], eax nf42_73:mov ax, bx shl eax, 16 nf42_74:mov ax, bx mov [edi+4], eax nf42_75:mov ax, bx shl eax, 16 nf42_76:mov ax, bx mov [edi+8], eax nf42_77:mov ax, bx shl eax, 16 nf42_78:mov ax, bx mov [edi+12], eax add edi, esi nf42_81:mov ax, bx shl eax, 16 nf42_82:mov ax, bx mov [edi], eax nf42_83:mov ax, bx shl eax, 16 nf42_84:mov ax, bx mov [edi+4], eax nf42_85:mov ax, bx shl eax, 16 nf42_86:mov ax, bx mov [edi+8], eax nf42_87:mov ax, bx shl eax, 16 nf42_88:mov ax, bx mov [edi+12], eax pop esi pop ebp add esi, 32 sub edi, [nfpk_back_right] ;br retn ;---------------------------------------- ALIGN 4 nf11: ; 8x8x16 (128 bytes) mov edx, [nf_width] %macro Trans16Blk 1 ; MACRO idx Trans16 bx, %1 ;idx mov [edi], bx Trans16 bx, (%1 + 2) ;idx+2 mov [edi+2], bx Trans16 bx, (%1 + 4) ;idx+4 mov [edi+4], bx Trans16 bx, (%1 + 6) ;idx+6 mov [edi+6], bx Trans16 bx, (%1 + 8) ;idx+8 mov [edi+8], bx Trans16 bx, (%1 + 10) ;idx+10 mov [edi+10], bx Trans16 bx, (%1 + 12) ;idx+12 mov [edi+12], bx Trans16 bx, (%1 + 14) ;idx+14 mov [edi+14], bx %endmacro Trans16Blk esi ;0 add edi, edx Trans16Blk esi+16 ;1 add edi, edx Trans16Blk esi+32 ;2 add edi, edx Trans16Blk esi+48 ;3 add edi, edx Trans16Blk esi+64 ;4 add edi, edx Trans16Blk esi+80 ;5 add edi, edx Trans16Blk esi+96 ;6 add edi, edx Trans16Blk esi+112 ;7 add esi, 128 sub edi, [nfpk_back_right] ;br ; (SHEIGHT-1)*width+8 retn ;---------------------------------------- ALIGN 4 nf12: ; low 4x4x16 (32 bytes) mov edx, [nf_width] Trans16 bx, esi shrd eax, ebx, 16 mov ax, bx mov [edi], eax mov [edi+edx], eax Trans16 bx, esi+2 shrd eax, ebx, 16 mov ax, bx mov [edi+4], eax mov [edi+edx+4], eax Trans16 bx, esi+4 shrd eax, ebx, 16 mov ax, bx mov [edi+8], eax mov [edi+edx+8], eax Trans16 bx, esi+6 shrd eax, ebx, 16 mov ax, bx mov [edi+12], eax mov [edi+edx+12], eax lea edi, [edi+edx*2] Trans16 bx, esi+8 shrd eax, ebx, 16 mov ax, bx mov [edi], eax mov [edi+edx], eax Trans16 bx, esi+10 shrd eax, ebx, 16 mov ax, bx mov [edi+4], eax mov [edi+edx+4], eax Trans16 bx, esi+12 shrd eax, ebx, 16 mov ax, bx mov [edi+8], eax mov [edi+edx+8], eax Trans16 bx, esi+14 shrd eax, ebx, 16 mov ax, bx mov [edi+12], eax mov [edi+edx+12], eax lea edi, [edi+edx*2] Trans16 bx, esi+16 shrd eax, ebx, 16 mov ax, bx mov [edi], eax mov [edi+edx], eax Trans16 bx, esi+18 shrd eax, ebx, 16 mov ax, bx mov [edi+4], eax mov [edi+edx+4], eax Trans16 bx, esi+20 shrd eax, ebx, 16 mov ax, bx mov [edi+8], eax mov [edi+edx+8], eax Trans16 bx, esi+22 shrd eax, ebx, 16 mov ax, bx mov [edi+12], eax mov [edi+edx+12], eax lea edi, [edi+edx*2] Trans16 bx, esi+24 shrd eax, ebx, 16 mov ax, bx mov [edi], eax mov [edi+edx], eax Trans16 bx, esi+26 shrd eax, ebx, 16 mov ax, bx mov [edi+4], eax mov [edi+edx+4], eax Trans16 bx, esi+28 shrd eax, ebx, 16 mov ax, bx mov [edi+8], eax mov [edi+edx+8], eax Trans16 bx, esi+30 shrd eax, ebx, 16 mov ax, bx mov [edi+12], eax mov [edi+edx+12], eax add edi, edx sub edi, [nfpk_back_right] ;br add esi, 32 retn ;---------------------------------------- ALIGN 4 nf13: ; 2x2 4x4x0 (8 bytes) mov edx, [nf_width] Trans16 cx, esi shrd ebx, ecx, 16 mov bx, cx Trans16 cx, esi+2 shrd eax, ecx, 16 mov ax, cx mov ecx, eax mov [edi], ebx mov [edi+4], ebx mov [edi+8], ecx mov [edi+12], ecx mov [edi+edx], ebx mov [edi+edx+4], ebx mov [edi+edx+8], ecx mov [edi+edx+12], ecx lea edi, [edi+edx*2] mov [edi], ebx mov [edi+4], ebx mov [edi+8], ecx mov [edi+12], ecx mov [edi+edx], ebx mov [edi+edx+4], ebx mov [edi+edx+8], ecx mov [edi+edx+12], ecx lea edi, [edi+edx*2] Trans16 cx, esi+4 shrd ebx, ecx, 16 mov bx, cx Trans16 cx, esi+6 shrd eax, ecx, 16 mov ax, cx mov ecx, eax mov [edi], ebx mov [edi+4], ebx mov [edi+8], ecx mov [edi+12], ecx mov [edi+edx], ebx mov [edi+edx+4], ebx mov [edi+edx+8], ecx mov [edi+edx+12], ecx lea edi, [edi+edx*2] mov [edi], ebx mov [edi+4], ebx mov [edi+8], ecx mov [edi+12], ecx mov [edi+edx], ebx mov [edi+edx+4], ebx mov [edi+edx+8], ecx mov [edi+edx+12], ecx add edi, edx sub edi, [nfpk_back_right] ; br add esi, 8 retn ;---------------------------------------- ALIGN 4 nf14: ; 8x8x0 (2 bytes) Trans16 cx, esi add esi, 2 shrd ebx, ecx, 16 mov bx, cx nf_solid: mov edx, [nf_width] mov [edi], ebx mov [edi+4], ebx mov [edi+8], ebx mov [edi+12], ebx add edi, edx mov [edi], ebx mov [edi+4], ebx mov [edi+8], ebx mov [edi+12], ebx add edi, edx mov [edi], ebx mov [edi+4], ebx mov [edi+8], ebx mov [edi+12], ebx add edi, edx mov [edi], ebx mov [edi+4], ebx mov [edi+8], ebx mov [edi+12], ebx add edi, edx mov [edi], ebx mov [edi+4], ebx mov [edi+8], ebx mov [edi+12], ebx add edi, edx mov [edi], ebx mov [edi+4], ebx mov [edi+8], ebx mov [edi+12], ebx add edi, edx mov [edi], ebx mov [edi+4], ebx mov [edi+8], ebx mov [edi+12], ebx add edi, edx mov [edi], ebx mov [edi+4], ebx mov [edi+8], ebx mov [edi+12], ebx sub edi, [nfpk_back_right] ;br ; (SHEIGHT-1)*width+8 retn ;---------------------------------------- ALIGN 4 nf15: ; unused retn ;nfHPkDecomp ENDP