170 lines
3.0 KiB
NASM
170 lines
3.0 KiB
NASM
%include "symbols.asm"
|
|
|
|
section .text
|
|
|
|
global print_str
|
|
print_str: ; takes pointer to string in rdi and retuns in rax
|
|
push rsi
|
|
push rdx
|
|
mov rsi, rdi
|
|
xor rdx, rdx
|
|
|
|
.count_loop:
|
|
cmp byte [rsi+rdx], 0
|
|
je .print
|
|
inc rdx
|
|
jmp .count_loop
|
|
|
|
.print:
|
|
mov rax, SYS_WRITE
|
|
mov rdi, STDOUT
|
|
syscall
|
|
pop rdx
|
|
pop rsi
|
|
ret
|
|
|
|
global unsigned_int_to_ascii
|
|
unsigned_int_to_ascii: ; takes pointer to array in rdi and value stored in rsi DOES NOT BOUNDS CHECK return len of string in rax
|
|
xor r11, r11
|
|
mov rcx, 10
|
|
mov rax, rsi
|
|
|
|
.count_loop:
|
|
inc r11
|
|
cmp rax, 10
|
|
jl .loop_count_exit
|
|
xor rdx, rdx
|
|
div rcx
|
|
push rdx
|
|
jmp .count_loop
|
|
|
|
.loop_count_exit:
|
|
push rax
|
|
|
|
xor rcx, rcx
|
|
|
|
.store_loop: ; basicly for loop
|
|
cmp rcx, r11
|
|
jnl .loop_store_exit
|
|
|
|
pop rax
|
|
add rax, ASCII_ZERO
|
|
mov byte [rdi + rcx], al
|
|
inc rcx
|
|
|
|
jmp .store_loop
|
|
|
|
.loop_store_exit:
|
|
|
|
mov rax, r11
|
|
|
|
ret
|
|
|
|
global string_copy
|
|
string_copy:; takes pointer to destination in rdi and pointer to source in rsi and return lenght in rax
|
|
|
|
xor rax, rax
|
|
xor rcx, rcx
|
|
.copy_next_byte:
|
|
mov byte cl, [rsi+rax]
|
|
test cl, cl
|
|
jz .exit
|
|
mov [rdi+rax], cl
|
|
inc rax
|
|
jmp .copy_next_byte
|
|
|
|
.exit:
|
|
ret
|
|
|
|
global memory_set:
|
|
memory_set:; takes destination in rdi, byte in sil and lenght in rdx
|
|
; first check if value is 16 byte alligned
|
|
|
|
mov r9, rdi; move destination to r9
|
|
|
|
mov r11, 0x0101010101010101; to extend across whoule register
|
|
movzx rsi, sil
|
|
imul r11, rsi; to extend across whoule register
|
|
|
|
cmp rdx, 16
|
|
jnl .write_16_or_more_bytes
|
|
mov rcx, rdx
|
|
jmp .write_less_than_16_bytes
|
|
.write_16_or_more_bytes:
|
|
mov rax, rdi; move destination to rax
|
|
and rax, 0xF; offset is stored in rax
|
|
|
|
|
|
test al, al; check if resault is 0
|
|
jz .addr_is_16_Byte_alligned
|
|
|
|
|
|
mov cl, 16
|
|
sub cl, al; now offset to first higher 16 byte alligned address is stored in r8
|
|
movzx rcx, cl; remove ani posible garbage
|
|
|
|
|
|
.write_less_than_16_bytes:
|
|
mov rax, r11
|
|
sub rdx, rcx; we will write these bytes now
|
|
|
|
rep stosb
|
|
|
|
.addr_is_16_Byte_alligned:
|
|
mov r10, rdx
|
|
shr r10, 4; set it to how many 128bit(16Byte) chunk we need
|
|
test r10, r10; check if we need to write aditional 16 bytes at all
|
|
jz .function_exit
|
|
|
|
%ifdef AVX512
|
|
vpbroadcastq xmm8, r11
|
|
%else
|
|
movq xmm8, r11
|
|
shufpd xmm8, xmm8, 0x00
|
|
%endif
|
|
|
|
.move_16_bytes:
|
|
movdqa [rdi], xmm8
|
|
add rdi, 16
|
|
sub rdx, 16
|
|
|
|
cmp rdx, 16; test if rdx is less than 16
|
|
jge .move_16_bytes
|
|
|
|
.function_exit:
|
|
|
|
test rdx, rdx; test if rdx is 0
|
|
jz .true_function_exit
|
|
mov cl, dl
|
|
jmp .write_less_than_16_bytes
|
|
|
|
.true_function_exit:
|
|
mov rax, r9; return pointer to memory area same as memset in libc
|
|
ret
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
global memory_copy:
|
|
memory_copy:; takes destination in rdi, source in rsi and lenght in rdx
|
|
; first check if value is 16 byte alligned
|
|
|
|
mov r9, rdi
|
|
|
|
mov rcx, rdx
|
|
shr rcx, 3; calculate how many 8 byte chunks we need
|
|
and rdx, 0x7; calculate the reminder
|
|
|
|
rep movsq; move as many 8 byte chunks as posible
|
|
|
|
mov rcx, rdx
|
|
|
|
rep movsb; move the rest
|
|
|
|
|
|
mov rax, r9; return pointer to memory area same as memcpy in libc
|
|
ret
|