From d8dc35c0834d2b9948c3ec39801b86f76707f24b Mon Sep 17 00:00:00 2001 From: flysand7 Date: Wed, 30 Aug 2023 19:52:29 +1100 Subject: [PATCH] switch stacks --- arch/sysv_x86-64/loader-trampoline.asm | 55 ++++++++++++++++++++++ build.py | 3 +- loader/loader-self-reloc.c | 18 ++++++-- loader/loader.c | 63 ++++++++++++++++++++------ 4 files changed, 120 insertions(+), 19 deletions(-) create mode 100644 arch/sysv_x86-64/loader-trampoline.asm diff --git a/arch/sysv_x86-64/loader-trampoline.asm b/arch/sysv_x86-64/loader-trampoline.asm new file mode 100644 index 0000000..7410fa4 --- /dev/null +++ b/arch/sysv_x86-64/loader-trampoline.asm @@ -0,0 +1,55 @@ + +bits 64 + +section .text +global ld_stack_trampoline + +; DESCRIPTION: +; This function copies the current stack down from RSP to the top of the stack +; into a memory region provided in parameter +; After the stack has been copied, we jump into ld_stage3_entry +; PARAMS: +; RDI (u64) - dst stack base +; RSI (u64) - src stack base +; RDX (u64) - dst stack size +; RCX (u64) - src stack size +; R8 (u64) - function to call +; R9 (u64) - param to pass to function +; RETURNS: +; guess what, it doesn't +ld_stack_trampoline: + ; Calculate the amount of memory we need to move to the new stack + add rcx, rsi + sub rcx, rsp + ; Calculate the base of the source memory + mov rsi, rsp + ; Check to see if the amount we're about to move is less than dst stack size + cmp rcx, rdx + jl .continue + xor rdi, rdi + mov rsi, r9 + mov rax, r8 + call rax +.continue: + ; Save rsi, rdi, rcx on stack + push rsi + push rdi + push rcx + ; copy stack + shr rcx, 3 + rep movsq + ; get back our regs + pop rcx + pop rdi + pop rsi + ; We copied the stack at this point, lets switch + add rsp, rdi + sub rsp, rsi + add rsp, rdx + sub rsp, rcx + ; We should have the new stack now, which is an exact copy of the old stack + and rsp, -16 + mov rdi, 1 + mov rsi, r9 + mov rax, r8 + call rax \ No newline at end of file diff --git a/build.py b/build.py index bcf1ba5..12c2156 100755 --- a/build.py +++ b/build.py @@ -144,7 +144,8 @@ if loader_debugging_enabled: cc_defines.append('_CIA_LD_DEBUG') print_step("Building lib/ld-cia.so\n") assemble_obj('bin/loader-entry.o', [f'arch/{target_abi}_{target_arch}/loader-entry.asm'], ['-f "elf64"']) -compile_shared('lib/ld-cia.so', ['bin/loader-entry.o','loader/loader-self-reloc.c','loader/loader.c']) +assemble_obj('bin/loader-trampoline.o', [f'arch/{target_abi}_{target_arch}/loader-trampoline.asm'], ['-f "elf64"']) +compile_shared('lib/ld-cia.so', ['bin/loader-entry.o','loader/loader-self-reloc.c','loader/loader.c','bin/loader-trampoline.o']) cc_defines_pop() cc_flags_pop() diff --git a/loader/loader-self-reloc.c b/loader/loader-self-reloc.c index 4d394bd..902cf8f 100644 --- a/loader/loader-self-reloc.c +++ b/loader/loader-self-reloc.c @@ -14,18 +14,27 @@ // are fine with gcc and clang (I think)) #include +#include #include #include -#include +#include +#include +#include +#include + #include "loader.h" -extern void loader_entry(Loader_Info *ld_info); +extern void ld_stage2_entry(Loader_Info *ld_info); -void _dlstart_reloc_c(u64 *sp, Elf64_Dyn *dynv) { +void ld_stage1_entry(u64 *sp, Elf64_Dyn *dynv) { _dbg_print_string("Entered dynamic loader\n"); // Find argc, argv in stack int argc = *sp; char **argv = (void *)(sp+1); + _dbg_printf("ARGV:\n"); + for(int i = 0; i < argc; ++i) { + _dbg_printf("[%d]: %s\n", (i64)i, argv[i]); + } // Skip over environment char **envp = argv+argc+1; int envc = 0; @@ -173,7 +182,6 @@ void _dlstart_reloc_c(u64 *sp, Elf64_Dyn *dynv) { } rela_offs += rela_ent; } - } _mfence(); Loader_Info ld_info; @@ -182,7 +190,7 @@ void _dlstart_reloc_c(u64 *sp, Elf64_Dyn *dynv) { ld_info.dyn = dyn; ld_info.aux = aux; _dbg_printf("Self-relocation finished. Entering the loader\n"); - loader_entry(&ld_info); + ld_stage2_entry(&ld_info); sys_exit(0); } diff --git a/loader/loader.c b/loader/loader.c index c406d93..e3917ff 100644 --- a/loader/loader.c +++ b/loader/loader.c @@ -39,6 +39,7 @@ struct Elf_Image { #define elf_addr(elf, off) (void *)((elf)->base + (u64)off) static Cia_Pool image_pool; +static Cia_Arena tmp_arena; static u32 elf_sym_gnu_hash(char *name) { unsigned char *s = (void *)name; @@ -91,7 +92,24 @@ static Elf64_Sym *elf_symbol_by_name(Elf_Image *image, char *name) { return NULL; } -void loader_entry(Loader_Info *ld_info) { +struct Stage3_Info_Struct typedef Stage3_Info_Struct; +struct Stage3_Info_Struct { + Elf_Image *app; + Elf_Image *ldso; +}; + +static void ld_stage3_entry(u64 has_new_stack, void *ctx); + +void ld_stack_trampoline( + void *stack_base + , void *old_stack_base + , u64 stack_size + , u64 old_stack_size + , void (*fn)(u64 has_new_stack, void *ctx) + , void *ctx +); + +void ld_stage2_entry(Loader_Info *ld_info) { _dbg_printf("Loader entry point reached!\n"); // Get our loader data back u64 *sp = ld_info->sp; @@ -101,7 +119,11 @@ void loader_entry(Loader_Info *ld_info) { cia_pool_create(&image_pool, cia_allocator_pages(), 1*KB, sizeof(Elf_Image), 0x10); Elf_Image *ldso = cia_pool_alloc(&image_pool); Elf_Image *app = cia_pool_alloc(&image_pool); - cia_arena_create(&app->arena, cia_allocator_pages(), 1*MB); + // Save some stuff for stage3 to eat + cia_arena_create(&tmp_arena, cia_allocator_pages(), 1*MB); + Stage3_Info_Struct *stage3 = cia_arena_alloc(&tmp_arena, sizeof(Stage3_Info_Struct)); + stage3->app = app; + stage3->ldso = ldso; ldso->base = ldso_base; ldso->dyn = dyn; // Read ldso elf header @@ -150,14 +172,6 @@ void loader_entry(Loader_Info *ld_info) { if(fd != 0) { sys_close(fd); } - // Get the information about the main thread stack - if(linux_read_stack_info()) { - printf("ERROR: failed to read /proc/self/maps to get the stack info\n"); - sys_exit(1); - } - _dbg_printf("Received stack: %x-%x\n", stack_info.start_addr, stack_info.end_addr); - u64 *ptr = (void *)stack_info.start_addr; - // *ptr = 1245; // Find .dynamic section { u8 *phdr = (void *)aux[AT_PHDR]; @@ -281,9 +295,32 @@ void loader_entry(Loader_Info *ld_info) { // Get the app main // Elf64_Sym *app_main = elf_symbol_by_name(app, "main"); // _dbg_printf("app main: %x\n", app_main); - void (*crt_entry)() = elf_addr(app, eh->e_entry); - _dbg_printf("Exiting the dynamic loader, trying to enter the main app\n"); - crt_entry(); + // Get the information about the main thread stack + if(linux_read_stack_info()) { + printf("ERROR: failed to read /proc/self/maps to get the stack info\n"); + sys_exit(1); + } + _dbg_printf("Found default stack at: %x-%x\n", stack_info.start_addr, stack_info.end_addr); + void *old_stack_base = (void *)stack_info.start_addr; + u64 old_stack_size = (u64)stack_info.end_addr - (u64)stack_info.start_addr; + u64 stack_size = 0x10000; + void *stack_base = sys_mmap(0, stack_size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE, -1, 0); + _dbg_printf("stage3 info struct: %x\n", stage3); + // Will jump to ld_stage3_entry + ld_stack_trampoline(stack_base, old_stack_base, stack_size, old_stack_size, &ld_stage3_entry, stage3); } sys_exit(0); } + +static void ld_stage3_entry(u64 has_new_stack, void *ctx) { + if(!has_new_stack) { + printf("ERROR: failed to switch the stack\n"); + sys_exit(1); + } + Stage3_Info_Struct *info = ctx; + _dbg_printf("Entered loader stage 3. Try entering main executable\n"); + _dbg_printf("stage3 info struct: %x\n", info); + void (*crt_entry)() = elf_addr(info->app, ((Elf64_Ehdr *)info->app->base)->e_entry); + _dbg_printf("Entry at: %x\n", crt_entry); + crt_entry(); +}