diff --git a/Makefile b/Makefile index 8f3d6f4..53c031f 100644 --- a/Makefile +++ b/Makefile @@ -7,6 +7,11 @@ LD_FLAGS := --strip-all DEBUG_LD_FLAGS := -g DEBUG_NASM_FLAGS := -g -F dwarf +# check for avx2 support +ifeq ($(shell grep -o 'avx2[^ ]*' /proc/cpuinfo | head -n 1),avx2) + NASM_FLAGS += -DAVX2 +endif + SRC_PATH := src OBJ_PATH := build/obj diff --git a/src/drawing.asm b/src/drawing.asm index 4e1cd28..5d98496 100644 --- a/src/drawing.asm +++ b/src/drawing.asm @@ -6,13 +6,16 @@ section .bss extern term_rows extern term_cols -section .data + simulation_running: RESB 1 + +section .rodata clear: db ESC_CHAR, "[2J", 0 reset: db ESC_CHAR, "[0m", 0 - statusbar: db ESC_CHAR, "[100m", "Use arrow keys to move cursor, enter to invert cell, p to simulation", 0 - - + statusbar: db ESC_CHAR, "[100m", "Use arrow keys to move cursor, enter to invert cell, p to simulation", 0 + + start_str: db "START", 0 + stop_str: db "STOP", 0 section .text extern print_str @@ -49,6 +52,7 @@ print_game_ui: call print_str + ret diff --git a/src/main.asm b/src/main.asm index 6d7ea80..82955a5 100644 --- a/src/main.asm +++ b/src/main.asm @@ -2,16 +2,18 @@ section .bss multipurpuse_buf: RESB 8 - + + global term_rows term_rows: RESW 1 + global term_cols term_cols: RESW 1 - + + global gameboard_ptr gameboard_ptr: RESQ 1 extern cursor_rows extern cursor_cols -section .data section .text extern print_str diff --git a/src/str.asm b/src/str.asm index a36da8d..eefb936 100644 --- a/src/str.asm +++ b/src/str.asm @@ -60,15 +60,83 @@ unsigned_int_to_ascii: ; takes pointer to array in rdi and value stored in rsi D ret +global string_copy string_copy:; takes pointer to destination in rdi and pointer to source in rsi xor rax, rax .copy_next_byte: - mov byte al, [rdi+rax] - mov [rsi+rax], al + mov byte cl, [rsi+rax] + test cl, cl + jz .exit + mov [rdi+rax], cl inc rax - test rax,rax - jnz .copy_next_byte + jmp .copy_next_byte + + .exit: ret + +global memory_set: +memory_set:; takes destination in rdi, byte in sil and lenght in rdx + ; first check if value is 16 byte alligned + + mov r9, rdi + + mov rax, rdi + and rax, 0xF; offset is stored in rax + test al, al; check if resault is 0 + jz .addr_is_16_Byte_alligned + mov r8b, 16 + sub r8b, al; now offset to first higher 16 byte alligned address is stored in r8 + sub rdx, r8; we will write these bytes now + movzx rax, sil + imul rax, 0x01010101; to extend across whoule register + shl rax, 32; to extend across whoule register + + ;add rdi, rdx + ; we know that rdi has initial address and rdx offset so well fill just add to it + mov rcx, 1; we will allwais copy only once + + + .check_qword:; check if offset is more than qword + cmp r8b, 8 + jl .check_dword + rep stosq + + .check_dword: + cmp r8b, 4 + jl .check_word + rep stosd + + .check_word: + cmp r8b, 2 + jl .check_byte + rep stosw + + .check_byte: + test r8b, r8b; check if offset is 1 or 0 + jz .addr_is_16_Byte_alligned + rep stosb + + + .addr_is_16_Byte_alligned: + shr rdx, 4; set it to how many 128bit(16Byte) chunk we need + + %ifdef AVX2 + vpbroadcastq xmm8, rax + %else + movq xmm8, rax + shufpd xmm8, xmm8, 0x00 + %endif + + .move_16_bytes: + movdqa [rdi], xmm8 + add rdi, 16 + dec rdx + + test rdx,rdx; test if rdx is zero + jnz .move_16_bytes + + mov rax, r9; return pointer to memory area same as memset in libc + ret