diff --git a/Makefile b/Makefile index c94c983..3c91744 100644 --- a/Makefile +++ b/Makefile @@ -8,6 +8,11 @@ DEBUG_LD_FLAGS := -g DEBUG_NASM_FLAGS := -g -F dwarf # check for avx2 support +ifeq ($(shell grep -o 'avx2[^ ]*' /proc/cpuinfo | head -n 1),avx512) + NASM_FLAGS += -DAVX2 +endif + +# check for avx512 support ifeq ($(shell grep -o 'avx512[^ ]*' /proc/cpuinfo | head -n 1),avx512) NASM_FLAGS += -DAVX512 endif diff --git a/src/drawing.asm b/src/drawing.asm index 3436be6..688db72 100644 --- a/src/drawing.asm +++ b/src/drawing.asm @@ -30,10 +30,23 @@ section .rodata start_str: db "START", 0 stop_str: db "STOP ", 0 + alive_switch_statement: + dq step_simulation.die; 0 + dq step_simulation.die; 1 + dq step_simulation.end_check; 2 + dq step_simulation.end_check; 3 + dq step_simulation.die; 4 + dq step_simulation.die; 5 + dq step_simulation.die; 6 + dq step_simulation.die; 7 + dq step_simulation.die; 8 + + section .text extern print_str extern string_copy extern memory_set +extern memory_copy extern alloc global init_gameboard @@ -99,19 +112,20 @@ print_game_ui: ret -%macro check_if_hashtag 1 +%macro check_if_hashtag 2 cmp r8, %1 - jl +7 + jl .no_count_%2 cmp r9, %1 - ja +5 + ja .no_count_%2 mov r11b, [%1] cmp r11b, '#' - jne +2 + jne .no_count_%2 inc dl + .no_count_%2: %endmacro global step_simulation: -step_simultion: +step_simulation: mov rdi, [next_frame_ptr]; destination mov rsi, [gameboard_ptr]; source mov rcx, [gameboard_size]; number of iterations @@ -120,48 +134,73 @@ step_simultion: mov r9, rsi add r9, rcx; store higest address posible so we are not checking out of bounds - mov r10, [term_cols] + mov r10w, [term_cols] ;mov r11, [term_rows] this register has been confiscated since i cannot use ah because of error: cannot use high byte register in rex instruction xor rax, rax; this shouldn't be needed but just to be sure xor r11, r11 xor rdx, rdx; we will use dl as # counter .for_every_column_on_gameboard: - mov al, [rdi]; NOTE to self if i need extra register i can shift this to ah and free up r11 + xor dl, dl + mov al, [rsi]; NOTE to self if i need extra register i can shift this to ah and free up r11 - inc rdi - check_if_hashtag rdi - dec rdi + inc rsi + check_if_hashtag rsi, 1 + dec rsi - check_if_hashtag rdi-1 + check_if_hashtag rsi-1, 2 - add rdi, r10 + add rsi, r10 - check_if_hashtag rdi + check_if_hashtag rsi, 3 + inc rsi + check_if_hashtag rsi, 4 + dec rsi + + check_if_hashtag rsi-1, 5 + + sub rsi, r10 + + + sub rsi, r10 + check_if_hashtag rsi, 6 + + inc rsi + check_if_hashtag rsi, 7 + dec rsi + + check_if_hashtag rsi-1, 8 + + add rsi, r10 + + cmp al, '#' + jne .dead_cell + + jmp [alive_switch_statement+(rdx*8)] + + .die: + mov byte [rdi], 0x20; SPACE + jmp .end_check + + .dead_cell: + cmp dl, 3 + jne .end_check + mov byte [rdi], '#' + + .end_check: + dec rcx inc rdi - check_if_hashtag rdi - dec rdi + inc rsi + test rcx, rcx + jnz .for_every_column_on_gameboard - check_if_hashtag rdi-1 + mov rsi, [next_frame_ptr]; destination + mov rdi, [gameboard_ptr]; source + mov rdx, [gameboard_size]; number of iterations - sub rdi, r10 - - - sub rdi, r10 - check_if_hashtag rdi - - inc rdi - check_if_hashtag rdi - dec rdi - - check_if_hashtag rdi-1 - - add rdi, r10 - - - ; TODO create jump table + call memory_copy ret diff --git a/src/input.asm b/src/input.asm index 48826ad..167f5e5 100644 --- a/src/input.asm +++ b/src/input.asm @@ -1,8 +1,8 @@ %include "symbols.asm" section .bss - cursor_rows: RESW 1; TODO DONT FORGET TO INICIALIZE - cursor_cols: RESW 1 + alignb 16 + termios: RESZ 1; 60 bytes is needed i use 64 for alligment and it is easier to work with extern multipurpuse_buf @@ -12,13 +12,16 @@ section .bss extern gameboard_ptr extern simulation_running - +section .data + cursor_rows: dw 1 + cursor_cols: dw 1 section .rodata cursor_up: db ESC_CHAR, "[1A", 0 cursor_down: db ESC_CHAR, "[1B", 0 cursor_right: db ESC_CHAR, "[1C", 0 cursor_left: db ESC_CHAR, "[1D", 0 + arrow_switch_statement: @@ -30,37 +33,79 @@ section .rodata section .text extern print_str +extern step_simulation +extern unsigned_int_to_ascii +extern print_game_ui global handle_user_input handle_user_input:; main loop of the program + push r12 + + lea r12, [multipurpuse_buf] .main_loop: + ; put the cursor where it should be + mov rdi, r12; multipurpuse_buf pointer is in r12 + mov word [rdi], 0x5B1B; will store ESC_CHAR, '[' they have to be in reverse order here due to little endian + add rdi, 2 + push rdi + xor rsi, rsi + mov si, [cursor_rows] + call unsigned_int_to_ascii + pop rdi + add rdi, rax; add lenght of string to pointer + mov byte [rdi], ';' + inc rdi + push rdi + mov si, [cursor_cols] + call unsigned_int_to_ascii + pop rdi + add rdi, rax + mov byte [rdi], 'H' + inc rdi + mov byte [rdi], 0; null terminate + + mov rdi, r12; multipurpuse_buf pointer is in r12 + call print_str + + + xor rax, rax - mov qword [multipurpuse_buf], rax; zeroout the buffer + mov qword [r12], rax; zeroout the buffer mov rax, SYS_POLL - mov rdi, STDIN + mov dword [r12], STDIN; create pollfd struct + mov word [r12+4], POLLIN + mov rdi, r12 mov rsi, 1; only one file descriptor is provided - mov rdx, 0; no timeout. maybe use this for final sleep but run if user inputs something TODO + mov rdx, 500; no timeout. maybe use this for final sleep but run if user inputs something TODO syscall test rax, rax; SYS_POLL returns 0 when no change happens within timeout - jz .no_input + jz .no_input + + xor rax, rax + mov qword [r12], rax; zeroout the buffer mov rax, SYS_READ mov rdi, STDIN - lea rsi, [multipurpuse_buf] + lea rsi, [r12] mov rdx, 8; size of multipurpuse buffer syscall; read user input - mov rax, [multipurpuse_buf] - shr rax, 5; we need only 3 bytes for this inpus sceame + cmp rax, EAGAIN + je .no_input - cmp eax, 0x001B5B44; check if input is more than left arrow - ja .handle_single_byte_chars + mov rax, [r12] + + cmp eax, 0x00415B1B; check if input is more than left arrow + jl .handle_single_byte_chars - sub eax, 0x1B5B41 + bswap eax + + sub eax, 0x1B5B4100 + shr eax, 8 jmp [arrow_switch_statement+(rax*8)]; lets hope this works .arrow_up: @@ -86,17 +131,20 @@ handle_user_input:; main loop of the program .handle_single_byte_chars: - shr eax, 2; get the char to al cmp al, 0xa; NEWLINE (enter key) jne .check_p xor rax, rax; zeroout rax mov ax, [cursor_rows] - mul dword [term_cols] - add rax, [cursor_cols] + dec ax + mul word [term_cols] + mov cx, [cursor_cols] + dec cx + add ax, cx - lea rdi, [gameboard_ptr+rax] + mov rdi, [gameboard_ptr] + add rdi, rax mov cl, [rdi] cmp cl, '#' je .hashtag_present @@ -133,9 +181,80 @@ handle_user_input:; main loop of the program .no_input: - + mov al, [simulation_running] + test al, al + jz .dont_step + call step_simulation + .dont_step: + call print_game_ui jmp .main_loop + pop r12 + ret + + +global disable_canonical_mode_and_echo +disable_canonical_mode_and_echo: + + mov rax, SYS_IOCTL + mov rdi, STDIN + mov rsi, TCGETS + lea rdx, [termios] + syscall + + ; save original termios struct + %ifdef AVX2 + %ifdef AVX512 + vmovdqa64 zmm0, [termios] + %else + vmovdqa ymm0, [termios] + vmovdqa ymm1, [termios+32] + %endif + %else + vmovdqa xmm0, [termios] + vmovdqa xmm1, [termios+16] + vmovdqa xmm2, [termios+32] + vmovdqa xmm3, [termios+64] + %endif + + + mov eax, [termios+12]; get c_lflag + and eax, NOT_ECHO; disable ECHO + and eax, NOT_ICANON; disable ICANON + mov [termios+12], eax + + mov rax, SYS_IOCTL + mov rdi, STDIN + mov rsi, TCSETS + lea rdx, [termios] + syscall + + + ; load original termios struct + %ifdef AVX2 + %ifdef AVX512 + vmovdqa64 [termios], zmm0 + %else + vmovdqa [termios], ymm0 + vmovdqa [termios+32], ymm1 + %endif + %else + vmovdqa [termios], xmm0 + vmovdqa [termios+16], xmm1 + vmovdqa [termios+32], xmm2 + vmovdqa [termios+64], xmm3 + %endif + + + ret + +global reset_terminal +reset_terminal: + mov rax, SYS_IOCTL + mov rdi, STDIN + mov rsi, TCSETS + lea rdx, [termios] + syscall ret diff --git a/src/main.asm b/src/main.asm index d46a2dd..7215f10 100644 --- a/src/main.asm +++ b/src/main.asm @@ -33,6 +33,11 @@ extern alloc extern init_gameboard extern print_game_ui +extern handle_user_input + +extern disable_canonical_mode_and_echo +extern reset_terminal + global _start _start: ; get terminal dimensions @@ -82,7 +87,11 @@ _start: call print_game_ui + call disable_canonical_mode_and_echo + call handle_user_input + + call reset_terminal diff --git a/src/str.asm b/src/str.asm index 91ae360..393aebe 100644 --- a/src/str.asm +++ b/src/str.asm @@ -24,7 +24,7 @@ print_str: ; takes pointer to string in rdi and retuns in rax ret global unsigned_int_to_ascii -unsigned_int_to_ascii: ; takes pointer to array in rdi and value stored in rsi DOES NOT BOUNDS CHECK +unsigned_int_to_ascii: ; takes pointer to array in rdi and value stored in rsi DOES NOT BOUNDS CHECK return len of string in rax xor r11, r11 mov rcx, 10 mov rax, rsi @@ -154,52 +154,10 @@ memory_copy:; takes destination in rdi, source in rsi and lenght in rdx mov r9, rdi - cmp rdx, 16 - jnl .write_16_or_more_bytes - mov rcx, rdx - jmp .write_less_than_16_bytes - .write_16_or_more_bytes: - mov rax, rdi; move destination to rax - and rax, 0xF; offset is stored in rax + mov rax, rdx + div 8 - - test al, al; check if resault is 0 - jz .addr_is_16_Byte_alligned - + mov - mov cl, 16 - sub cl, al; now offset to first higher 16 byte alligned address is stored in r8 - movzx rcx, cl; remove ani posible garbage - - - .write_less_than_16_bytes: - sub rdx, rcx; we will write these bytes now - - rep movsb - - .addr_is_16_Byte_alligned: - mov r10, rdx - shr r10, 4; set it to how many 128bit(16Byte) chunk we need - test r10, r10; check if we need to write aditional 16 bytes at all - jz .function_exit - - .move_16_bytes: - movdqa xmm8, [rsi] - movdqa [rdi], xmm8 - add rdi, 16 - add rsi, 16 - sub rdx, 16 - - cmp rdx, 16; test if rdx is less than 16 - jge .move_16_bytes - - .function_exit: - - test rdx, rdx; test if rdx is 0 - jz .true_function_exit - movzx rcx, dl - jmp .write_less_than_16_bytes - - .true_function_exit: mov rax, r9; return pointer to memory area same as memset in libc ret diff --git a/src/symbols.asm b/src/symbols.asm index b34649b..176527d 100644 --- a/src/symbols.asm +++ b/src/symbols.asm @@ -10,9 +10,16 @@ STDIN equ 0 STDOUT equ 1 TIOCGWINSZ equ 0x5413 -POLLIN equ 1 +TCGETS equ 0x5401 +TCSETS equ 0x5402 F_SETFL equ 4 O_NONBLOCK equ 2048 +POLLIN equ 0x0100; compensate for litle endian + +NOT_ECHO equ -9 +NOT_ICANON equ -3 + +EAGAIN equ -11 ASCII_ZERO equ 48 ESC_CHAR equ 27