diff --git a/.gitignore b/.gitignore index 173fc0f..ae0b1dc 100644 --- a/.gitignore +++ b/.gitignore @@ -29,4 +29,5 @@ limine **/.DS_Store .DS_Store /build/ -*.o \ No newline at end of file +*.o +disk.img \ No newline at end of file diff --git a/disk.img b/disk.img index b8bc54d..6aaaf5c 100644 Binary files a/disk.img and b/disk.img differ diff --git a/src/arch/interrupts.asm b/src/arch/interrupts.asm index 717a369..1b4dd74 100644 --- a/src/arch/interrupts.asm +++ b/src/arch/interrupts.asm @@ -45,7 +45,11 @@ isr%2_wrapper: push r14 push r15 - ; Save SSE/FPU state (fxsave requires 16-byte alignment) + test qword [rsp + 144], 3 + jz %%skip_swap + swapgs +%%skip_swap: + sub rsp, 512 fxsave [rsp] @@ -76,6 +80,12 @@ isr%2_wrapper: pop rcx pop rbx pop rax + + test qword [rsp + 24], 3 + jz %%skip_swap_back + swapgs +%%skip_swap_back: + add rsp, 16 ; drop dummy vector and error code iretq %endmacro @@ -163,8 +173,12 @@ exception_common: push r13 push r14 push r15 + + test qword [rsp + 144], 3 + jz .skip_swap_exc + swapgs +.skip_swap_exc: - ; Save SSE/FPU state (fxsave requires 16-byte alignment) sub rsp, 512 fxsave [rsp] @@ -196,6 +210,12 @@ exception_common: pop rcx pop rbx pop rax + + test qword [rsp + 24], 3 + jz .skip_swap_back_exc + swapgs +.skip_swap_back_exc: + add rsp, 16 ; drop vector and error code iretq diff --git a/src/arch/syscalls.asm b/src/arch/syscalls.asm index 0f3f3b5..3fcf49e 100644 --- a/src/arch/syscalls.asm +++ b/src/arch/syscalls.asm @@ -15,15 +15,14 @@ section .text ; R9 = arg5 syscall_entry: - ; 1. Switch to Kernel Stack safely - ; Note: For true SMP safety, we need per-CPU storage (via swapgs). - ; For now, we use a global scratch which is only safe because we mask interrupts on entry. - mov [rel user_rsp_scratch], rsp - mov rsp, [rel kernel_syscall_stack] + swapgs + + mov [gs:40], rsp + mov rsp, [gs:48] - ; 2. Build iretq frame (compatible with registers_t) + ; 2. Build iretq frame push 0x1B ; SS (User Data) - push qword [rel user_rsp_scratch] ; RSP + push qword [gs:40] ; RSP push r11 ; RFLAGS (captured by syscall) push 0x23 ; CS (User Code) push rcx ; RIP (return address from syscall) @@ -81,14 +80,7 @@ syscall_entry: pop rax add rsp, 16 ; drop int_no/err_code - ; Debug: check RIP before iretq - ; We can't easily print from here without destroying registers, - ; but we can at least check if it's canonical. - + swapgs iretq section .bss -global kernel_syscall_stack -global user_rsp_scratch -kernel_syscall_stack: resq 1 -user_rsp_scratch: resq 1 diff --git a/src/core/main.c b/src/core/main.c index 6657a06..69ff52c 100644 --- a/src/core/main.c +++ b/src/core/main.c @@ -158,6 +158,8 @@ void kmain(void) { // The memory manager will now scan the memory map and manage all usable regions. memory_manager_init_from_memmap(memmap_request.response); serial_write("[DEBUG] memory_manager_init OK\n"); + smp_init_bsp(); + serial_write("[DEBUG] smp_init_bsp OK\n"); } else { serial_write("[DEBUG] ERROR: No usable memory for heap! Check Limine memmap.\n"); hcf(); diff --git a/src/fs/fat32.c b/src/fs/fat32.c index f09e684..349ab21 100644 --- a/src/fs/fat32.c +++ b/src/fs/fat32.c @@ -1561,15 +1561,11 @@ static int vfs_realfs_read(void *fs_private, void *file_handle, void *buf, int s uint8_t *cluster_buf = (uint8_t*)kmalloc(cluster_size); if (!cluster_buf) return -1; - extern void serial_write(const char*); - serial_write("[VFS] vfs_realfs_read enter\n"); - uint64_t rflags = spinlock_acquire_irqsave(&vol->lock); int ret = realfs_read_file(handle, buf, size, cluster_buf); spinlock_release_irqrestore(&vol->lock, rflags); kfree(cluster_buf); - serial_write("[VFS] vfs_realfs_read exit\n"); return ret; } @@ -1583,15 +1579,11 @@ static int vfs_realfs_write(void *fs_private, void *file_handle, const void *buf uint8_t *cluster_buf = (uint8_t*)kmalloc(cluster_size); if (!cluster_buf) return -1; - extern void serial_write(const char*); - serial_write("[VFS] vfs_realfs_write enter\n"); - uint64_t rflags = spinlock_acquire_irqsave(&vol->lock); int ret = realfs_write_file(handle, buf, size, cluster_buf); spinlock_release_irqrestore(&vol->lock, rflags); kfree(cluster_buf); - serial_write("[VFS] vfs_realfs_write exit\n"); return ret; } diff --git a/src/sys/process.c b/src/sys/process.c index 7636c7a..23973de 100644 --- a/src/sys/process.c +++ b/src/sys/process.c @@ -23,10 +23,10 @@ process_t processes[MAX_PROCESSES] __attribute__((aligned(16))); int process_count = 0; static process_t* current_process[MAX_CPUS_SCHED] = {0}; // Per-CPU static uint32_t next_pid = 0; -static void *free_kernel_stack_later = NULL; -static uint64_t free_pml4_later = 0; +static void *free_kernel_stack_later[MAX_CPUS_SCHED] = {0}; +static uint64_t free_pml4_later[MAX_CPUS_SCHED] = {0}; static spinlock_t runqueue_lock = SPINLOCK_INIT; -static uint32_t next_cpu_assign = 1; // Round-robin CPU assignment (start from CPU 1) +static uint32_t next_cpu_assign = 1; void process_init(void) { for (int i = 0; i < MAX_PROCESSES; i++) { @@ -376,21 +376,35 @@ process_t* process_get_current(void) { } uint64_t process_schedule(uint64_t current_rsp) { - if (free_kernel_stack_later) { - kfree(free_kernel_stack_later); - free_kernel_stack_later = NULL; + uint32_t my_cpu = smp_this_cpu_id(); + uint64_t rflags = spinlock_acquire_irqsave(&runqueue_lock); + + void *cleanup_stack = NULL; + uint64_t cleanup_pml4 = 0; + + if (free_kernel_stack_later[my_cpu]) { + cleanup_stack = free_kernel_stack_later[my_cpu]; + free_kernel_stack_later[my_cpu] = NULL; } - if (free_pml4_later) { - extern void paging_destroy_user_pml4_phys(uint64_t pml4_phys); - paging_destroy_user_pml4_phys(free_pml4_later); - free_pml4_later = 0; + if (free_pml4_later[my_cpu]) { + cleanup_pml4 = free_pml4_later[my_cpu]; + free_pml4_later[my_cpu] = 0; } - uint32_t my_cpu = smp_this_cpu_id(); process_t *cur = current_process[my_cpu]; - if (!cur || !cur->next || cur == cur->next) + if (!cur || !cur->next || cur == cur->next) { + spinlock_release_irqrestore(&runqueue_lock, rflags); + + // Perform cleanup outside the lock + if (cleanup_stack) kfree(cleanup_stack); + if (cleanup_pml4) { + extern void paging_destroy_user_pml4_phys(uint64_t pml4_phys); + paging_destroy_user_pml4_phys(cleanup_pml4); + } + return current_rsp; + } // Save context cur->rsp = current_rsp; @@ -412,11 +426,8 @@ uint64_t process_schedule(uint64_t current_rsp) { next_proc = next_proc->next; } - // If we didn't find a ready process for our CPU, stay on current (unless we are terminated) if (next_proc->cpu_affinity != my_cpu || next_proc->pid == 0xFFFFFFFF) { - // Fallback to idle if current is terminated if (cur && cur->pid == 0xFFFFFFFF) { - // Find the idle process for this CPU for (int i = 0; i < MAX_PROCESSES; i++) { if (processes[i].pid == 0 || (processes[i].cpu_affinity == my_cpu && processes[i].is_user == false)) { next_proc = &processes[i]; @@ -424,18 +435,25 @@ uint64_t process_schedule(uint64_t current_rsp) { } } } else { + spinlock_release_irqrestore(&runqueue_lock, rflags); + + if (cleanup_stack) kfree(cleanup_stack); + if (cleanup_pml4) { + extern void paging_destroy_user_pml4_phys(uint64_t pml4_phys); + paging_destroy_user_pml4_phys(cleanup_pml4); + } + return current_rsp; } } current_process[my_cpu] = next_proc; - // Update Kernel Stack for User Mode interrupts and System Calls if (current_process[my_cpu]->is_user && current_process[my_cpu]->kernel_stack) { tss_set_stack_cpu(my_cpu, current_process[my_cpu]->kernel_stack); - if (my_cpu == 0) { - extern uint64_t kernel_syscall_stack; - kernel_syscall_stack = current_process[my_cpu]->kernel_stack; + cpu_state_t *cpu_state = smp_get_cpu(my_cpu); + if (cpu_state) { + cpu_state->kernel_syscall_stack = current_process[my_cpu]->kernel_stack; } } @@ -443,8 +461,16 @@ uint64_t process_schedule(uint64_t current_rsp) { paging_switch_directory(current_process[my_cpu]->pml4_phys); current_process[my_cpu]->ticks++; + uint64_t next_rsp = current_process[my_cpu]->rsp; - return current_process[my_cpu]->rsp; + spinlock_release_irqrestore(&runqueue_lock, rflags); + if (cleanup_stack) kfree(cleanup_stack); + if (cleanup_pml4) { + extern void paging_destroy_user_pml4_phys(uint64_t pml4_phys); + paging_destroy_user_pml4_phys(cleanup_pml4); + } + + return next_rsp; } process_t* process_get_by_pid(uint32_t pid) { @@ -600,25 +626,22 @@ uint64_t process_terminate_current(void) { // 4. Load context for the NEXT process if (current_process[my_cpu]->is_user && current_process[my_cpu]->kernel_stack) { tss_set_stack_cpu(my_cpu, current_process[my_cpu]->kernel_stack); - if (my_cpu == 0) { - extern uint64_t kernel_syscall_stack; - kernel_syscall_stack = current_process[my_cpu]->kernel_stack; + cpu_state_t *cpu_state = smp_get_cpu(my_cpu); + if (cpu_state) { + cpu_state->kernel_syscall_stack = current_process[my_cpu]->kernel_stack; } } paging_switch_directory(current_process[my_cpu]->pml4_phys); - // 5. Free memory - if (to_delete->user_stack_alloc) kfree(to_delete->user_stack_alloc); - - extern void paging_destroy_user_pml4_phys(uint64_t pml4_phys); - if (to_delete->pml4_phys && to_delete->is_user) { - paging_destroy_user_pml4_phys(to_delete->pml4_phys); + + kfree(to_delete->user_stack_alloc); + to_delete->user_stack_alloc = NULL; } - - to_delete->user_stack_alloc = NULL; - free_kernel_stack_later = to_delete->kernel_stack_alloc; + + free_kernel_stack_later[my_cpu] = to_delete->kernel_stack_alloc; to_delete->kernel_stack_alloc = NULL; + free_pml4_later[my_cpu] = to_delete->pml4_phys; to_delete->pml4_phys = 0; uint64_t next_rsp = current_process[my_cpu]->rsp; @@ -666,4 +689,3 @@ process_t* process_get_by_ui_window(void *win) { } return NULL; } - diff --git a/src/sys/smp.c b/src/sys/smp.c index 026c817..e676a6f 100644 --- a/src/sys/smp.c +++ b/src/sys/smp.c @@ -18,6 +18,16 @@ extern void serial_write_hex(uint64_t n); static cpu_state_t *cpu_states = NULL; static uint32_t total_cpus = 0; static uint32_t bsp_lapic_id = 0; +static cpu_state_t *bsp_cpu_state = NULL; + +#define MSR_GS_BASE 0xC0000101 +#define MSR_KERNEL_GS_BASE 0xC0000102 + +static inline void wrmsr(uint32_t msr, uint64_t value) { + uint32_t low = (uint32_t)value; + uint32_t high = (uint32_t)(value >> 32); + asm volatile("wrmsr" : : "c"(msr), "a"(low), "d"(high)); +} static uint32_t read_lapic_id(void) { uint32_t eax, ebx, ecx, edx; @@ -27,6 +37,12 @@ static uint32_t read_lapic_id(void) { uint32_t smp_this_cpu_id(void) { if (total_cpus <= 1) return 0; + + // Use GS-based self-pointer to get the structure first + cpu_state_t *state; + asm volatile("movq %%gs:0, %0" : "=r"(state) : : "memory"); + if (state) return state->cpu_id; + uint32_t lapic = read_lapic_id(); for (uint32_t i = 0; i < total_cpus; i++) { if (cpu_states[i].lapic_id == lapic) return i; @@ -68,13 +84,21 @@ static void ap_entry(struct limine_smp_info *info) { extern void idt_load(void); idt_load(); + extern void syscall_init(void); + syscall_init(); + uint64_t kernel_cr3 = paging_get_pml4_phys(); asm volatile("mov %0, %%cr3" : : "r"(kernel_cr3)); extern void lapic_enable(void); lapic_enable(); + cpu_states[my_id].self = &cpu_states[my_id]; cpu_states[my_id].online = true; + cpu_states[my_id].kernel_syscall_stack = cpu_states[my_id].kernel_stack; + + wrmsr(MSR_GS_BASE, (uint64_t)&cpu_states[my_id]); + wrmsr(MSR_KERNEL_GS_BASE, (uint64_t)&cpu_states[my_id]); serial_write("[SMP] AP "); serial_write_num(my_id); @@ -90,6 +114,19 @@ static void ap_entry(struct limine_smp_info *info) { work_queue_drain_loop(); } +void smp_init_bsp(void) { + static cpu_state_t bsp_state_static = {0}; + bsp_state_static.cpu_id = 0; + bsp_state_static.lapic_id = read_lapic_id(); + bsp_state_static.self = &bsp_state_static; + bsp_state_static.online = true; + + wrmsr(MSR_GS_BASE, (uint64_t)&bsp_state_static); + wrmsr(MSR_KERNEL_GS_BASE, (uint64_t)&bsp_state_static); + + bsp_cpu_state = &bsp_state_static; +} + // --- SMP Initialization --- uint32_t smp_init(struct limine_smp_response *smp_resp) { if (!smp_resp || smp_resp->cpu_count <= 1) { @@ -132,8 +169,15 @@ uint32_t smp_init(struct limine_smp_response *smp_resp) { cpu_states[i].lapic_id = cpu->lapic_id; if (cpu->lapic_id == bsp_lapic_id) { - cpu_states[i].online = true; + cpu_states[i] = *bsp_cpu_state; // Copy early BSP state + cpu_states[i].self = &cpu_states[i]; + + cpu_states[i].kernel_stack = 0; // Limine stack for now + cpu_states[i].kernel_syscall_stack = 0; bsp_index = i; + wrmsr(MSR_GS_BASE, (uint64_t)&cpu_states[i]); + wrmsr(MSR_KERNEL_GS_BASE, (uint64_t)&cpu_states[i]); + serial_write("[SMP] BSP CPU "); serial_write_num(i); serial_write(" (LAPIC "); diff --git a/src/sys/smp.h b/src/sys/smp.h index 62b8b4d..c4ae47e 100644 --- a/src/sys/smp.h +++ b/src/sys/smp.h @@ -8,29 +8,27 @@ #include #include "spinlock.h" -// Per-CPU state. Dynamically allocated at boot based on actual CPU count. typedef struct cpu_state { - uint32_t cpu_id; // Logical CPU index (0 = BSP) - uint32_t lapic_id; // Local APIC ID from Limine - uint64_t kernel_stack; // Top of kernel stack for this CPU - void *kernel_stack_alloc; // Base allocation for kfree - volatile bool online; // True once AP is fully initialized + struct cpu_state *self; + uint32_t cpu_id; + uint32_t lapic_id; + uint64_t kernel_stack; + void *kernel_stack_alloc; + volatile bool online; + uint64_t user_rsp_scratch; + uint64_t kernel_syscall_stack; } cpu_state_t; + + void smp_init_bsp(void); + -// Initialize SMP — call after GDT/IDT/memory init but before wm_init. -// Pass the Limine SMP response. APs will be started and will enter their -// idle loops. Returns the number of CPUs brought online. struct limine_smp_response; uint32_t smp_init(struct limine_smp_response *smp_resp); -// Get the current CPU index (0 = BSP). Uses CPUID to read LAPIC ID, -// then looks up in the cpu table. uint32_t smp_this_cpu_id(void); -// Total number of CPUs online. uint32_t smp_cpu_count(void); -// Get per-CPU state by index. cpu_state_t *smp_get_cpu(uint32_t cpu_id); #endif diff --git a/src/sys/syscall.c b/src/sys/syscall.c index 2f19cb5..74eae87 100644 --- a/src/sys/syscall.c +++ b/src/sys/syscall.c @@ -77,6 +77,14 @@ static void smp_user_wrapper(void *arg) { } void syscall_init(void) { + uint64_t efer = rdmsr(MSR_EFER); + efer |= 1; + wrmsr(MSR_EFER, efer); + uint64_t star = ((uint64_t)0x001B << 48) | ((uint64_t)0x0008 << 32); + wrmsr(MSR_STAR, star); + extern void syscall_entry(void); + wrmsr(MSR_LSTAR, (uint64_t)syscall_entry); + wrmsr(MSR_FMASK, 0x200); } static void user_window_close(Window *win) { diff --git a/src/wm/explorer.c b/src/wm/explorer.c index 49abe25..73a1971 100644 --- a/src/wm/explorer.c +++ b/src/wm/explorer.c @@ -710,12 +710,7 @@ static void explorer_load_directory(Window *win, const char *path) { int count = vfs_list_directory(path, entries, capacity); - // Trace string to see if we reached here - extern void serial_write(const char*); extern void serial_write_num(uint32_t); - serial_write("[EXPLORER] load_directory: "); serial_write(path); serial_write(" | loop start. count: "); serial_write_num(count); serial_write("\n"); - while (count == capacity) { - serial_write("[EXPLORER] Doubling capacity to: "); serial_write_num(capacity * 2); serial_write("\n"); capacity *= 2; vfs_dirent_t *new_entries = (vfs_dirent_t*)krealloc(entries, capacity * sizeof(vfs_dirent_t)); if (!new_entries) { kfree(entries); return; } @@ -723,8 +718,6 @@ static void explorer_load_directory(Window *win, const char *path) { count = vfs_list_directory(path, entries, capacity); } - serial_write("[EXPLORER] load_directory loop complete.\n"); - if (state->items_capacity < count) { int new_cap = count < EXPLORER_INITIAL_CAPACITY ? EXPLORER_INITIAL_CAPACITY : count; ExplorerItem *new_items = (ExplorerItem*)krealloc(state->items, new_cap * sizeof(ExplorerItem));