From a7c3cccce7e7f1078a1f3768a7abd5a354dd3fa4 Mon Sep 17 00:00:00 2001 From: boreddevnl Date: Tue, 17 Mar 2026 21:44:21 +0100 Subject: [PATCH] CheckP: smp support --- src/core/main.c | 23 ++++- src/fs/fat32.c | 122 ++++++++++++----------- src/mem/memory_manager.c | 16 +-- src/sys/gdt.c | 94 +++++++++++++++--- src/sys/gdt.h | 7 ++ src/sys/process.c | 28 +++--- src/sys/smp.c | 209 +++++++++++++++++++++++++++++++++++++++ src/sys/smp.h | 36 +++++++ src/sys/spinlock.h | 62 ++++++++++++ src/sys/work_queue.c | 62 ++++++++++++ src/sys/work_queue.h | 33 +++++++ 11 files changed, 593 insertions(+), 99 deletions(-) create mode 100644 src/sys/smp.c create mode 100644 src/sys/smp.h create mode 100644 src/sys/spinlock.h create mode 100644 src/sys/work_queue.c create mode 100644 src/sys/work_queue.h diff --git a/src/core/main.c b/src/core/main.c index 99798fb..36a260d 100644 --- a/src/core/main.c +++ b/src/core/main.c @@ -19,6 +19,8 @@ #include "memory_manager.h" #include "platform.h" #include "wallpaper.h" +#include "smp.h" +#include "work_queue.h" // --- Limine Requests --- __attribute__((used, section(".requests"))) @@ -42,11 +44,19 @@ static volatile struct limine_module_request module_request = { .revision = 0 }; +__attribute__((used, section(".requests"))) +static volatile struct limine_smp_request smp_request = { + .id = LIMINE_SMP_REQUEST, + .revision = 0, + .flags = 0 +}; + __attribute__((used, section(".requests_start"))) static volatile struct limine_request *const requests_start_marker[] = { (struct limine_request *)&framebuffer_request, (struct limine_request *)&memmap_request, (struct limine_request *)&module_request, + (struct limine_request *)&smp_request, NULL }; @@ -231,11 +241,22 @@ void kmain(void) { ps2_init(); asm("sti"); + // Initialize SMP — bring up all CPU cores + if (smp_request.response != NULL) { + uint32_t online = smp_init(smp_request.response); + serial_write("[DEBUG] SMP init complete, CPUs online: "); + serial_write_num(online); + serial_write("\n"); + } else { + serial_write("[DEBUG] No SMP response from bootloader\n"); + // Still init as single-CPU + smp_init(NULL); + } + wm_init(); asm volatile("sti"); - while (1) { wm_process_input(); wm_process_deferred_thumbs(); diff --git a/src/fs/fat32.c b/src/fs/fat32.c index 4c9db84..4d2ae85 100644 --- a/src/fs/fat32.c +++ b/src/fs/fat32.c @@ -8,6 +8,10 @@ #include #include #include "wm.h" +#include "spinlock.h" + +// Global lock for FAT32 operations (SMP safety) +static spinlock_t fat32_lock = SPINLOCK_INIT; #define MAX_FILES 256 @@ -1218,8 +1222,8 @@ char fat32_get_current_drive(void) { } FAT32_FileHandle* fat32_open(const char *path, const char *mode) { - uint64_t rflags; - asm volatile("pushfq; pop %0; cli" : "=r"(rflags)); + // SMP: Use FAT32 spinlock + uint64_t rflags = spinlock_acquire_irqsave(&fat32_lock); const char *p = path; char drive = parse_drive_from_path(&p); @@ -1234,13 +1238,13 @@ FAT32_FileHandle* fat32_open(const char *path, const char *mode) { handle = realfs_open(drive, p, mode); } - asm volatile("push %0; popfq" : : "r"(rflags)); + spinlock_release_irqrestore(&fat32_lock, rflags); return handle; } void fat32_close(FAT32_FileHandle *handle) { - uint64_t rflags; - asm volatile("pushfq; pop %0; cli" : "=r"(rflags)); + // SMP: Use FAT32 spinlock + uint64_t rflags = spinlock_acquire_irqsave(&fat32_lock); if (handle && handle->valid) { if (handle->drive != 'A' && handle->mode != 0) { // Both read and write modes for real drives Disk *d = disk_get_by_letter(handle->drive); @@ -1265,14 +1269,14 @@ void fat32_close(FAT32_FileHandle *handle) { } handle->valid = false; } - asm volatile("push %0; popfq" : : "r"(rflags)); + spinlock_release_irqrestore(&fat32_lock, rflags); } int fat32_read(FAT32_FileHandle *handle, void *buffer, int size) { - uint64_t rflags; - asm volatile("pushfq; pop %0; cli" : "=r"(rflags)); + // SMP: Use FAT32 spinlock + uint64_t rflags = spinlock_acquire_irqsave(&fat32_lock); if (!handle || !handle->valid || handle->mode != 0) { - asm volatile("push %0; popfq" : : "r"(rflags)); + spinlock_release_irqrestore(&fat32_lock, rflags); return -1; } @@ -1283,15 +1287,15 @@ int fat32_read(FAT32_FileHandle *handle, void *buffer, int size) { ret = realfs_read(handle, buffer, size); } - asm volatile("push %0; popfq" : : "r"(rflags)); + spinlock_release_irqrestore(&fat32_lock, rflags); return ret; } int fat32_write(FAT32_FileHandle *handle, const void *buffer, int size) { - uint64_t rflags; - asm volatile("pushfq; pop %0; cli" : "=r"(rflags)); + // SMP: Use FAT32 spinlock + uint64_t rflags = spinlock_acquire_irqsave(&fat32_lock); if (!handle || !handle->valid || (handle->mode != 1 && handle->mode != 2)) { - asm volatile("push %0; popfq" : : "r"(rflags)); + spinlock_release_irqrestore(&fat32_lock, rflags); return -1; } @@ -1302,15 +1306,15 @@ int fat32_write(FAT32_FileHandle *handle, const void *buffer, int size) { ret = realfs_write(handle, buffer, size); } - asm volatile("push %0; popfq" : : "r"(rflags)); + spinlock_release_irqrestore(&fat32_lock, rflags); return ret; } int fat32_seek(FAT32_FileHandle *handle, int offset, int whence) { - uint64_t rflags; - asm volatile("pushfq; pop %0; cli" : "=r"(rflags)); + // SMP: Use FAT32 spinlock + uint64_t rflags = spinlock_acquire_irqsave(&fat32_lock); if (!handle || !handle->valid) { - asm volatile("push %0; popfq" : : "r"(rflags)); + spinlock_release_irqrestore(&fat32_lock, rflags); return -1; } @@ -1349,7 +1353,7 @@ int fat32_seek(FAT32_FileHandle *handle, int offset, int whence) { } } - asm volatile("push %0; popfq" : : "r"(rflags)); + spinlock_release_irqrestore(&fat32_lock, rflags); return new_position; } @@ -1487,13 +1491,13 @@ bool fat32_mkdir(const char *path) { const char *p = path; char drive = parse_drive_from_path(&p); - uint64_t rflags; - asm volatile("pushfq; pop %0; cli" : "=r"(rflags)); + // SMP: Use FAT32 spinlock + uint64_t rflags = spinlock_acquire_irqsave(&fat32_lock); if (drive != 'A') { bool res = realfs_mkdir(drive, p); wm_notify_fs_change(); - asm volatile("push %0; popfq" : : "r"(rflags)); + spinlock_release_irqrestore(&fat32_lock, rflags); return res; } @@ -1501,18 +1505,18 @@ bool fat32_mkdir(const char *path) { fat32_normalize_path(p, normalized); if (ramfs_find_file(normalized)) { - asm volatile("push %0; popfq" : : "r"(rflags)); + spinlock_release_irqrestore(&fat32_lock, rflags); return false; } if (!check_desktop_limit(normalized)) { - asm volatile("push %0; popfq" : : "r"(rflags)); + spinlock_release_irqrestore(&fat32_lock, rflags); return false; } FileEntry *entry = ramfs_find_free_entry(); if (!entry) { - asm volatile("push %0; popfq" : : "r"(rflags)); + spinlock_release_irqrestore(&fat32_lock, rflags); return false; } @@ -1525,33 +1529,33 @@ bool fat32_mkdir(const char *path) { entry->attributes = ATTR_DIRECTORY; wm_notify_fs_change(); - asm volatile("push %0; popfq" : : "r"(rflags)); + spinlock_release_irqrestore(&fat32_lock, rflags); return true; } bool fat32_rmdir(const char *path) { if (parse_drive_from_path(&path) != 'A') return false; - uint64_t rflags; - asm volatile("pushfq; pop %0; cli" : "=r"(rflags)); + // SMP: Use FAT32 spinlock + uint64_t rflags = spinlock_acquire_irqsave(&fat32_lock); char normalized[FAT32_MAX_PATH]; fat32_normalize_path(path, normalized); FileEntry *entry = ramfs_find_file(normalized); if (!entry || !(entry->attributes & ATTR_DIRECTORY)) { - asm volatile("push %0; popfq" : : "r"(rflags)); + spinlock_release_irqrestore(&fat32_lock, rflags); return false; } entry->used = false; wm_notify_fs_change(); - asm volatile("push %0; popfq" : : "r"(rflags)); + spinlock_release_irqrestore(&fat32_lock, rflags); return true; } bool fat32_delete(const char *path) { - uint64_t rflags; - asm volatile("pushfq; pop %0; cli" : "=r"(rflags)); + // SMP: Use FAT32 spinlock + uint64_t rflags = spinlock_acquire_irqsave(&fat32_lock); const char *p = path; char drive = parse_drive_from_path(&p); @@ -1565,7 +1569,7 @@ bool fat32_delete(const char *path) { FileEntry *entry = ramfs_find_file(normalized); if (!entry || (entry->attributes & ATTR_DIRECTORY)) { - asm volatile("push %0; popfq" : : "r"(rflags)); + spinlock_release_irqrestore(&fat32_lock, rflags); return false; } @@ -1577,13 +1581,13 @@ bool fat32_delete(const char *path) { result = realfs_delete(drive, p); } - asm volatile("push %0; popfq" : : "r"(rflags)); + spinlock_release_irqrestore(&fat32_lock, rflags); return result; } int fat32_get_info(const char *path, FAT32_FileInfo *info) { - uint64_t rflags; - asm volatile("pushfq; pop %0; cli" : "=r"(rflags)); + // SMP: Use FAT32 spinlock + uint64_t rflags = spinlock_acquire_irqsave(&fat32_lock); const char *p = path; char drive = parse_drive_from_path(&p); @@ -1619,13 +1623,13 @@ int fat32_get_info(const char *path, FAT32_FileInfo *info) { } } - asm volatile("push %0; popfq" : : "r"(rflags)); + spinlock_release_irqrestore(&fat32_lock, rflags); return result; } bool fat32_exists(const char *path) { - uint64_t rflags; - asm volatile("pushfq; pop %0; cli" : "=r"(rflags)); + // SMP: Use FAT32 spinlock + uint64_t rflags = spinlock_acquire_irqsave(&fat32_lock); const char *p = path; char drive = parse_drive_from_path(&p); @@ -1644,7 +1648,7 @@ bool fat32_exists(const char *path) { } } - asm volatile("push %0; popfq" : : "r"(rflags)); + spinlock_release_irqrestore(&fat32_lock, rflags); return exists; } @@ -1653,13 +1657,13 @@ bool fat32_rename(const char *old_path, const char *new_path) { if (parse_drive_from_path(&old_path) != 'A') return false; if (parse_drive_from_path(&new_path) != 'A') return false; - uint64_t rflags; - asm volatile("pushfq; pop %0; cli" : "=r"(rflags)); + // SMP: Use FAT32 spinlock + uint64_t rflags = spinlock_acquire_irqsave(&fat32_lock); FileEntry *entry = ramfs_find_file(old_path); // Need to normalize inside find? yes ramfs_find calls normalize - if (!entry) { asm volatile("push %0; popfq" : : "r"(rflags)); return false; } + if (!entry) { spinlock_release_irqrestore(&fat32_lock, rflags); return false; } // Check destination - if (ramfs_find_file(new_path)) { asm volatile("push %0; popfq" : : "r"(rflags)); return false; } + if (ramfs_find_file(new_path)) { spinlock_release_irqrestore(&fat32_lock, rflags); return false; } size_t old_len = fs_strlen(old_path); // Logic from original rename... @@ -1689,13 +1693,13 @@ bool fat32_rename(const char *old_path, const char *new_path) { } } wm_notify_fs_change(); - asm volatile("push %0; popfq" : : "r"(rflags)); + spinlock_release_irqrestore(&fat32_lock, rflags); return true; } bool fat32_is_directory(const char *path) { - uint64_t rflags; - asm volatile("pushfq; pop %0; cli" : "=r"(rflags)); + // SMP: Use FAT32 spinlock + uint64_t rflags = spinlock_acquire_irqsave(&fat32_lock); const char *p = path; char drive = parse_drive_from_path(&p); @@ -1718,13 +1722,13 @@ bool fat32_is_directory(const char *path) { } } - asm volatile("push %0; popfq" : : "r"(rflags)); + spinlock_release_irqrestore(&fat32_lock, rflags); return is_dir; } int fat32_list_directory(const char *path, FAT32_FileInfo *entries, int max_entries) { - uint64_t rflags; - asm volatile("pushfq; pop %0; cli" : "=r"(rflags)); + // SMP: Use FAT32 spinlock + uint64_t rflags = spinlock_acquire_irqsave(&fat32_lock); const char *p = path; char drive = parse_drive_from_path(&p); @@ -1749,13 +1753,13 @@ int fat32_list_directory(const char *path, FAT32_FileInfo *entries, int max_entr count = realfs_list_directory(drive, p, entries, max_entries); } - asm volatile("push %0; popfq" : : "r"(rflags)); + spinlock_release_irqrestore(&fat32_lock, rflags); return count; } bool fat32_chdir(const char *path) { - uint64_t rflags; - asm volatile("pushfq; pop %0; cli" : "=r"(rflags)); + // SMP: Use FAT32 spinlock + uint64_t rflags = spinlock_acquire_irqsave(&fat32_lock); const char *p = path; char drive = parse_drive_from_path(&p); @@ -1767,11 +1771,11 @@ bool fat32_chdir(const char *path) { current_dir[1] = 0; // If just switching drive (e.g. "B:"), return true if (p[0] == 0) { - asm volatile("push %0; popfq" : : "r"(rflags)); + spinlock_release_irqrestore(&fat32_lock, rflags); return true; } } else { - asm volatile("push %0; popfq" : : "r"(rflags)); + spinlock_release_irqrestore(&fat32_lock, rflags); return false; } } @@ -1787,17 +1791,17 @@ bool fat32_chdir(const char *path) { } } - asm volatile("push %0; popfq" : : "r"(rflags)); + spinlock_release_irqrestore(&fat32_lock, rflags); return true; } - asm volatile("push %0; popfq" : : "r"(rflags)); + spinlock_release_irqrestore(&fat32_lock, rflags); return false; } void fat32_get_current_dir(char *buffer, int size) { - uint64_t rflags; - asm volatile("pushfq; pop %0; cli" : "=r"(rflags)); + // SMP: Use FAT32 spinlock + uint64_t rflags = spinlock_acquire_irqsave(&fat32_lock); int len = 0; buffer[0] = current_drive; @@ -1811,5 +1815,5 @@ void fat32_get_current_dir(char *buffer, int size) { buffer[len + i] = current_dir[i]; } buffer[len + dir_len] = 0; - asm volatile("push %0; popfq" : : "r"(rflags)); + spinlock_release_irqrestore(&fat32_lock, rflags); } \ No newline at end of file diff --git a/src/mem/memory_manager.c b/src/mem/memory_manager.c index 9ff9031..da1577b 100644 --- a/src/mem/memory_manager.c +++ b/src/mem/memory_manager.c @@ -5,6 +5,7 @@ #include #include "limine.h" #include "platform.h" +#include "spinlock.h" // --- Internal State --- // memory_pool is no longer a single pointer, as we now manage multiple regions. @@ -16,6 +17,7 @@ static size_t total_allocated = 0; static size_t peak_allocated = 0; static uint32_t allocation_counter = 0; static bool initialized = false; +static spinlock_t mm_lock = SPINLOCK_INIT; extern void serial_write(const char *str); extern void serial_write_num(uint32_t n); @@ -174,8 +176,7 @@ static void remove_block_at(int idx) { void* kmalloc_aligned(size_t size, size_t alignment) { if (!initialized || size == 0) return NULL; - uint64_t rflags; - asm volatile("pushfq; pop %0; cli" : "=r"(rflags)); + uint64_t rflags = spinlock_acquire_irqsave(&mm_lock); if (alignment == 0) alignment = 8; size = (size + 7) & ~7ULL; // Ensure size is multiple of 8 @@ -245,12 +246,12 @@ void* kmalloc_aligned(size_t size, size_t alignment) { if (total_allocated > peak_allocated) peak_allocated = total_allocated; mem_memset(ptr, 0, size); - asm volatile("push %0; popfq" : : "r"(rflags)); + spinlock_release_irqrestore(&mm_lock, rflags); return ptr; } } - asm volatile("push %0; popfq" : : "r"(rflags)); + spinlock_release_irqrestore(&mm_lock, rflags); return NULL; } @@ -261,8 +262,7 @@ void* kmalloc(size_t size) { void kfree(void *ptr) { if (ptr == NULL || !initialized) return; - uint64_t rflags; - asm volatile("pushfq; pop %0; cli" : "=r"(rflags)); + uint64_t rflags = spinlock_acquire_irqsave(&mm_lock); int block_idx = -1; for (int i = 0; i < block_count; i++) { @@ -273,7 +273,7 @@ void kfree(void *ptr) { } if (block_idx == -1) { - asm volatile("push %0; popfq" : : "r"(rflags)); + spinlock_release_irqrestore(&mm_lock, rflags); return; } @@ -301,7 +301,7 @@ void kfree(void *ptr) { } } - asm volatile("push %0; popfq" : : "r"(rflags)); + spinlock_release_irqrestore(&mm_lock, rflags); } void* krealloc(void *ptr, size_t new_size) { diff --git a/src/sys/gdt.c b/src/sys/gdt.c index 8ccd118..31ee735 100644 --- a/src/sys/gdt.c +++ b/src/sys/gdt.c @@ -14,11 +14,19 @@ static void *gdt_memset(void *s, int c, size_t n) { return s; } -#define GDT_ENTRIES 7 +// Base GDT: 5 segments + 1 TSS (2 entries) = 7 entries for BSP. +// For SMP: we add 2 entries per additional CPU for their TSS. +// Max supported: 32 CPUs → 5 + 2*32 = 69 entries max. +#define GDT_BASE_ENTRIES 5 // NULL, KCode, KData, UData, UCode +#define GDT_MAX_ENTRIES 69 // 5 + 2*32 -struct gdt_entry gdt[GDT_ENTRIES]; +struct gdt_entry gdt[GDT_MAX_ENTRIES]; struct gdt_ptr gdtr; -struct tss_entry tss; +struct tss_entry tss; // BSP TSS (CPU 0) + +// Per-CPU TSS array (dynamically allocated for AP cores) +static struct tss_entry *ap_tss_array = NULL; +static uint32_t ap_tss_count = 0; extern void gdt_flush(uint64_t); extern void tss_flush(void); @@ -35,8 +43,8 @@ static void gdt_set_gate(int num, uint32_t base, uint32_t limit, uint8_t access, gdt[num].access = access; } -static void gdt_set_tss_gate(int num, uint64_t base, uint32_t limit, uint8_t access, uint8_t gran) { - // A TSS entry in x86_64 is 16 bytes (takes up 2 adjacent GDT entries) +// Write a 16-byte TSS descriptor into GDT entries [num] and [num+1] +static void gdt_set_tss_gate_at(int num, uint64_t base, uint32_t limit, uint8_t access, uint8_t gran) { struct { uint16_t limit_low; uint16_t base_low; @@ -65,44 +73,98 @@ void tss_set_stack(uint64_t kernel_stack) { tss.rsp0 = kernel_stack; } +void tss_set_stack_cpu(uint32_t cpu_id, uint64_t kernel_stack) { + if (cpu_id == 0) { + tss.rsp0 = kernel_stack; + } else if (ap_tss_array && cpu_id - 1 < ap_tss_count) { + ap_tss_array[cpu_id - 1].rsp0 = kernel_stack; + } +} + void gdt_init(void) { - gdtr.limit = (sizeof(struct gdt_entry) * GDT_ENTRIES) - 1; + // Start with 7 entries (5 segments + BSP TSS taking 2) + gdtr.limit = (sizeof(struct gdt_entry) * 7) - 1; gdtr.base = (uint64_t)&gdt; // NULL segment gdt_set_gate(0, 0, 0, 0, 0); // Kernel Code segment (Ring 0, 64-bit) - // 0x9A: Present(1), Ring(0), System(1), Executable(1), DirConforming(0), Readable(1), Accessed(0) - // 0xAF: Long Mode (64-bit) (L=1, DB=0) gdt_set_gate(1, 0, 0, 0x9A, 0xAF); // Kernel Data segment (Ring 0) - // 0x92: Present(1), Ring(0), System(1), Executable(0), DirExpandDown(0), Writable(1), Accessed(0) gdt_set_gate(2, 0, 0, 0x92, 0xAF); // User Data segment (Ring 3) - // 0xF2: Present(1), Ring(3), System(1), Executable(0), DirExpandDown(0), Writable(1), Accessed(0) gdt_set_gate(3, 0, 0, 0xF2, 0xAF); // User Code segment (Ring 3, 64-bit) - // 0xFA: Present(1), Ring(3), System(1), Executable(1), DirConforming(0), Readable(1), Accessed(0) - // 0xAF: Long Mode (64-bit) gdt_set_gate(4, 0, 0, 0xFA, 0xAF); - // TSS segment (takes entries 5 and 6 technically because it's 16 bytes) + // BSP TSS segment (entries 5 and 6) gdt_memset(&tss, 0, sizeof(struct tss_entry)); tss.iopb_offset = sizeof(struct tss_entry); - // Allocate a default Ring 0 interrupt stack in case an interrupt fires early or - // the scheduler hasn't set one up yet for a task. void* initial_tss_stack = kmalloc_aligned(4096, 4096); if (initial_tss_stack) { tss.rsp0 = (uint64_t)initial_tss_stack + 4096; } - gdt_set_tss_gate(5, (uint64_t)&tss, sizeof(struct tss_entry) - 1, 0x89, 0x00); + gdt_set_tss_gate_at(5, (uint64_t)&tss, sizeof(struct tss_entry) - 1, 0x89, 0x00); gdt_flush((uint64_t)&gdtr); tss_flush(); } + +// SMP: Add TSS entries for all AP cores and reload the GDT. +void gdt_init_ap_tss(uint32_t cpu_count) { + if (cpu_count <= 1) return; // No APs + + uint32_t ap_count = cpu_count - 1; + ap_tss_count = ap_count; + + // Allocate per-CPU TSS structures + ap_tss_array = (struct tss_entry *)kmalloc(ap_count * sizeof(struct tss_entry)); + if (!ap_tss_array) return; + gdt_memset(ap_tss_array, 0, ap_count * sizeof(struct tss_entry)); + + // Each AP TSS goes at GDT slot 7 + (i*2) (since slot 5-6 is BSP TSS) + for (uint32_t i = 0; i < ap_count; i++) { + int gdt_slot = 7 + (i * 2); + if (gdt_slot + 1 >= GDT_MAX_ENTRIES) break; + + ap_tss_array[i].iopb_offset = sizeof(struct tss_entry); + + // Allocate a kernel stack for this AP's interrupt handling + void *ap_int_stack = kmalloc_aligned(8192, 4096); + if (ap_int_stack) { + ap_tss_array[i].rsp0 = (uint64_t)ap_int_stack + 8192; + } + + gdt_set_tss_gate_at(gdt_slot, (uint64_t)&ap_tss_array[i], + sizeof(struct tss_entry) - 1, 0x89, 0x00); + } + + // Update GDT limit to include all new entries + uint32_t total_entries = 7 + (ap_count * 2); + if (total_entries > GDT_MAX_ENTRIES) total_entries = GDT_MAX_ENTRIES; + gdtr.limit = (sizeof(struct gdt_entry) * total_entries) - 1; + + // Reload GDTR on BSP with the expanded limit. + // We must NOT call tss_flush() here — the BSP TSS is already loaded + // and marked "busy" (0x8B). Trying to LTR a busy TSS causes GPF. + asm volatile("lgdt %0" : : "m"(gdtr)); +} + +// SMP: Load the TSS for a specific AP. Called from ap_entry(). +void gdt_load_ap_tss(uint32_t cpu_id) { + if (cpu_id == 0) { + // BSP uses slot 5 → selector 0x28 + asm volatile("mov $0x28, %%ax; ltr %%ax" ::: "ax"); + return; + } + + // AP cpu_id maps to GDT slot 7 + ((cpu_id-1) * 2) + uint16_t selector = (uint16_t)((7 + ((cpu_id - 1) * 2)) * sizeof(struct gdt_entry)); + asm volatile("ltr %0" : : "r"(selector)); +} diff --git a/src/sys/gdt.h b/src/sys/gdt.h index 99836b0..0ce7913 100644 --- a/src/sys/gdt.h +++ b/src/sys/gdt.h @@ -48,4 +48,11 @@ struct gdt_ptr { void gdt_init(void); void tss_set_stack(uint64_t kernel_stack); +// SMP: Initialize per-CPU TSS entries. Call after smp detects cpu_count. +void gdt_init_ap_tss(uint32_t cpu_count); +// SMP: Load the TSS for a specific CPU (called from AP entry). +void gdt_load_ap_tss(uint32_t cpu_id); +// SMP: Set kernel stack for a specific CPU's TSS. +void tss_set_stack_cpu(uint32_t cpu_id, uint64_t kernel_stack); + #endif diff --git a/src/sys/process.c b/src/sys/process.c index f5ae52c..aab6b0e 100644 --- a/src/sys/process.c +++ b/src/sys/process.c @@ -10,6 +10,7 @@ #include "memory_manager.h" #include "elf.h" #include "wm.h" +#include "spinlock.h" extern void cmd_write(const char *str); extern void serial_write(const char *str); @@ -20,6 +21,7 @@ int process_count = 0; static process_t* current_process = NULL; static uint32_t next_pid = 0; static void *free_kernel_stack_later = NULL; +static spinlock_t runqueue_lock = SPINLOCK_INIT; void process_init(void) { for (int i = 0; i < MAX_PROCESSES; i++) { @@ -132,11 +134,10 @@ void process_create(void* entry_point, bool is_user) { new_proc->fpu_initialized = true; // Add to linked list (Critical Section) - uint64_t rflags; - asm volatile("pushfq; pop %0; cli" : "=r"(rflags)); + uint64_t rflags = spinlock_acquire_irqsave(&runqueue_lock); new_proc->next = current_process->next; current_process->next = new_proc; - asm volatile("push %0; popfq" : : "r"(rflags)); + spinlock_release_irqrestore(&runqueue_lock, rflags); } process_t* process_create_elf(const char* filepath, const char* args_str) { @@ -316,11 +317,10 @@ process_t* process_create_elf(const char* filepath, const char* args_str) { new_proc->fpu_initialized = true; // Add to linked list (Critical Section) - uint64_t rflags; - asm volatile("pushfq; pop %0; cli" : "=r"(rflags)); + uint64_t rflags = spinlock_acquire_irqsave(&runqueue_lock); new_proc->next = current_process->next; current_process->next = new_proc; - asm volatile("push %0; popfq" : : "r"(rflags)); + spinlock_release_irqrestore(&runqueue_lock, rflags); serial_write("[PROCESS] Spawned ELF Executable: "); serial_write(filepath); @@ -411,8 +411,7 @@ static void process_cleanup_inner(process_t *proc) { void process_terminate(process_t *to_delete) { if (!to_delete || to_delete->pid == 0xFFFFFFFF || to_delete->pid == 0) return; - uint64_t rflags; - asm volatile("pushfq; pop %0; cli" : "=r"(rflags)); + uint64_t rflags = spinlock_acquire_irqsave(&runqueue_lock); process_cleanup_inner(to_delete); @@ -424,7 +423,7 @@ void process_terminate(process_t *to_delete) { if (prev == to_delete) { // Only one process (should be kernel), cannot terminate. - asm volatile("push %0; popfq" : : "r"(rflags)); + spinlock_release_irqrestore(&runqueue_lock, rflags); return; } @@ -459,15 +458,14 @@ void process_terminate(process_t *to_delete) { to_delete->kernel_stack_alloc = NULL; to_delete->pml4_phys = 0; - asm volatile("push %0; popfq" : : "r"(rflags)); + spinlock_release_irqrestore(&runqueue_lock, rflags); } uint64_t process_terminate_current(void) { - uint64_t rflags; - asm volatile("pushfq; pop %0; cli" : "=r"(rflags)); + uint64_t rflags = spinlock_acquire_irqsave(&runqueue_lock); if (!current_process || current_process->pid == 0) { - asm volatile("push %0; popfq" : : "r"(rflags)); + spinlock_release_irqrestore(&runqueue_lock, rflags); return 0; } @@ -484,7 +482,7 @@ uint64_t process_terminate_current(void) { if (prev == current_process) { // Only one process (should be kernel), cannot terminate. - asm volatile("push %0; popfq" : : "r"(rflags)); + spinlock_release_irqrestore(&runqueue_lock, rflags); return to_delete->rsp; } @@ -520,7 +518,7 @@ uint64_t process_terminate_current(void) { to_delete->pml4_phys = 0; uint64_t next_rsp = current_process->rsp; - asm volatile("push %0; popfq" : : "r"(rflags)); + spinlock_release_irqrestore(&runqueue_lock, rflags); return next_rsp; } diff --git a/src/sys/smp.c b/src/sys/smp.c new file mode 100644 index 0000000..d9958e5 --- /dev/null +++ b/src/sys/smp.c @@ -0,0 +1,209 @@ +// Copyright (c) 2023-2026 Chris (boreddevnl) +// This software is released under the GNU General Public License v3.0. See LICENSE file for details. +// This header needs to maintain in any file it is present in, as per the GPL license terms. +#include "smp.h" +#include "limine.h" +#include "memory_manager.h" +#include "gdt.h" +#include "idt.h" +#include "platform.h" +#include "paging.h" + +extern void serial_write(const char *str); +extern void serial_write_num(uint32_t n); +extern void serial_write_hex(uint64_t n); + +// --- Dynamically allocated per-CPU state --- +static cpu_state_t *cpu_states = NULL; // Array[cpu_count] +static uint32_t total_cpus = 0; +static uint32_t bsp_lapic_id = 0; + +// Get LAPIC ID via CPUID leaf 0x01 (works on all x86_64) +static uint32_t read_lapic_id(void) { + uint32_t eax, ebx, ecx, edx; + asm volatile("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx) : "a"(1)); + return (ebx >> 24) & 0xFF; +} + +uint32_t smp_this_cpu_id(void) { + if (total_cpus <= 1) return 0; + uint32_t lapic = read_lapic_id(); + for (uint32_t i = 0; i < total_cpus; i++) { + if (cpu_states[i].lapic_id == lapic) return i; + } + return 0; // Fallback to BSP +} + +uint32_t smp_cpu_count(void) { + return total_cpus; +} + +cpu_state_t *smp_get_cpu(uint32_t cpu_id) { + if (cpu_id >= total_cpus) return NULL; + return &cpu_states[cpu_id]; +} + +// --- AP Entry Point --- +// Called by Limine on each Application Processor. +// The limine_smp_info* is passed as a parameter. +static void ap_entry(struct limine_smp_info *info) { + // 1. Figure out which CPU we are + uint32_t my_id = (uint32_t)(info->extra_argument); + + // 2. Enable FPU/SSE on this core (same as BSP does in platform_init) + uint64_t cr0; + asm volatile("mov %%cr0, %0" : "=r"(cr0)); + cr0 &= ~(1ULL << 2); // Clear EM + cr0 |= (1ULL << 1); // Set MP + cr0 |= (1ULL << 5); // Set NE + asm volatile("mov %0, %%cr0" : : "r"(cr0)); + + uint64_t cr4; + asm volatile("mov %%cr4, %0" : "=r"(cr4)); + cr4 |= (1ULL << 9); // OSFXSR + cr4 |= (1ULL << 10); // OSXMMEXCPT + asm volatile("mov %0, %%cr4" : : "r"(cr4)); + asm volatile("fninit"); + + // 3. Load the shared GDT (same one the BSP uses — all CPUs share kernel mappings) + extern struct gdt_ptr gdtr; + asm volatile("lgdt %0" : : "m"(gdtr)); + + // 4. Load per-CPU TSS + gdt_load_ap_tss(my_id); + + // 5. Load the shared IDT + extern void idt_load(void); + idt_load(); + + // 6. Load the kernel page tables (same CR3 as BSP — shared kernel space) + uint64_t kernel_cr3 = paging_get_pml4_phys(); + asm volatile("mov %0, %%cr3" : : "r"(kernel_cr3)); + + // 7. Mark ourselves as online + cpu_states[my_id].online = true; + + serial_write("[SMP] AP "); + serial_write_num(my_id); + serial_write(" online (LAPIC "); + serial_write_num(cpu_states[my_id].lapic_id); + serial_write(")\n"); + + // 8. Enable interrupts and enter idle loop + // APs don't handle PIC interrupts (only BSP does with legacy PIC). + // But they WILL pick up work from the work queue. + asm volatile("sti"); + + // Import work queue drain function + extern void work_queue_drain_loop(void); + work_queue_drain_loop(); + + // Should never reach here + for (;;) { asm volatile("hlt"); } +} + +// --- SMP Initialization --- +uint32_t smp_init(struct limine_smp_response *smp_resp) { + if (!smp_resp || smp_resp->cpu_count <= 1) { + // Single CPU system — just set up the BSP entry + total_cpus = 1; + cpu_states = (cpu_state_t *)kmalloc(sizeof(cpu_state_t)); + if (!cpu_states) return 1; + extern void mem_memset(void *, int, size_t); + mem_memset(cpu_states, 0, sizeof(cpu_state_t)); + cpu_states[0].cpu_id = 0; + cpu_states[0].lapic_id = read_lapic_id(); + cpu_states[0].online = true; + serial_write("[SMP] Single CPU mode\n"); + return 1; + } + + total_cpus = (uint32_t)smp_resp->cpu_count; + bsp_lapic_id = smp_resp->bsp_lapic_id; + + serial_write("[SMP] Detected "); + serial_write_num(total_cpus); + serial_write(" CPUs. BSP LAPIC ID: "); + serial_write_num(bsp_lapic_id); + serial_write("\n"); + + // Allocate per-CPU state array + cpu_states = (cpu_state_t *)kmalloc(total_cpus * sizeof(cpu_state_t)); + if (!cpu_states) { + serial_write("[SMP] ERROR: Failed to allocate CPU state array!\n"); + total_cpus = 1; + return 1; + } + extern void mem_memset(void *, int, size_t); + mem_memset(cpu_states, 0, total_cpus * sizeof(cpu_state_t)); + + // Initialize per-CPU GDT/TSS entries for all CPUs + gdt_init_ap_tss(total_cpus); + + // Fill in CPU state and start APs + uint32_t bsp_index = 0; + for (uint32_t i = 0; i < total_cpus; i++) { + struct limine_smp_info *cpu = smp_resp->cpus[i]; + cpu_states[i].cpu_id = i; + cpu_states[i].lapic_id = cpu->lapic_id; + + if (cpu->lapic_id == bsp_lapic_id) { + // This is the BSP — already running + cpu_states[i].online = true; + bsp_index = i; + serial_write("[SMP] BSP CPU "); + serial_write_num(i); + serial_write(" (LAPIC "); + serial_write_num(cpu->lapic_id); + serial_write(") online\n"); + } else { + // Allocate a kernel stack for this AP + void *ap_stack = kmalloc_aligned(65536, 65536); + if (!ap_stack) { + serial_write("[SMP] ERROR: Failed to allocate AP stack!\n"); + continue; + } + cpu_states[i].kernel_stack = (uint64_t)ap_stack + 65536; + cpu_states[i].kernel_stack_alloc = ap_stack; + cpu_states[i].online = false; + + // Set extra_argument so the AP knows its index + cpu->extra_argument = i; + + // Tell Limine to start this AP. Limine sets up the AP's stack + // from extra_argument's stack, but we need the goto_address. + // Limine will jump to ap_entry with the AP's limine_smp_info*. + // Important: Limine creates a temporary stack for the AP, and the + // goto_address is where the AP starts executing. + + serial_write("[SMP] Starting AP "); + serial_write_num(i); + serial_write(" (LAPIC "); + serial_write_num(cpu->lapic_id); + serial_write(")...\n"); + + // This atomic write triggers the AP to start executing at ap_entry + __atomic_store_n(&cpu->goto_address, ap_entry, __ATOMIC_SEQ_CST); + } + } + + // Wait for all APs to come online (with timeout) + volatile uint32_t timeout = 10000000; + uint32_t online_count = 0; + while (timeout-- > 0) { + online_count = 0; + for (uint32_t i = 0; i < total_cpus; i++) { + if (cpu_states[i].online) online_count++; + } + if (online_count == total_cpus) break; + asm volatile("pause"); + } + + serial_write("[SMP] All "); + serial_write_num(online_count); + serial_write(" of "); + serial_write_num(total_cpus); + serial_write(" CPUs online\n"); + + return online_count; +} diff --git a/src/sys/smp.h b/src/sys/smp.h new file mode 100644 index 0000000..62b8b4d --- /dev/null +++ b/src/sys/smp.h @@ -0,0 +1,36 @@ +// Copyright (c) 2023-2026 Chris (boreddevnl) +// This software is released under the GNU General Public License v3.0. See LICENSE file for details. +// This header needs to maintain in any file it is present in, as per the GPL license terms. +#ifndef SMP_H +#define SMP_H + +#include +#include +#include "spinlock.h" + +// Per-CPU state. Dynamically allocated at boot based on actual CPU count. +typedef struct cpu_state { + uint32_t cpu_id; // Logical CPU index (0 = BSP) + uint32_t lapic_id; // Local APIC ID from Limine + uint64_t kernel_stack; // Top of kernel stack for this CPU + void *kernel_stack_alloc; // Base allocation for kfree + volatile bool online; // True once AP is fully initialized +} cpu_state_t; + +// Initialize SMP — call after GDT/IDT/memory init but before wm_init. +// Pass the Limine SMP response. APs will be started and will enter their +// idle loops. Returns the number of CPUs brought online. +struct limine_smp_response; +uint32_t smp_init(struct limine_smp_response *smp_resp); + +// Get the current CPU index (0 = BSP). Uses CPUID to read LAPIC ID, +// then looks up in the cpu table. +uint32_t smp_this_cpu_id(void); + +// Total number of CPUs online. +uint32_t smp_cpu_count(void); + +// Get per-CPU state by index. +cpu_state_t *smp_get_cpu(uint32_t cpu_id); + +#endif diff --git a/src/sys/spinlock.h b/src/sys/spinlock.h new file mode 100644 index 0000000..8ba4c28 --- /dev/null +++ b/src/sys/spinlock.h @@ -0,0 +1,62 @@ +// Copyright (c) 2023-2026 Chris (boreddevnl) +// This software is released under the GNU General Public License v3.0. See LICENSE file for details. +// This header needs to maintain in any file it is present in, as per the GPL license terms. +#ifndef SPINLOCK_H +#define SPINLOCK_H + +#include + +// Simple test-and-set spinlock for x86_64 SMP. +// Uses 'lock xchg' for acquire and a plain store for release. +// Includes 'pause' to reduce bus contention while spinning. + +typedef volatile uint32_t spinlock_t; + +#define SPINLOCK_INIT 0 + +static inline void spinlock_acquire(spinlock_t *lock) { + while (1) { + // Try to set the lock from 0 -> 1 + uint32_t prev; + asm volatile("lock xchgl %0, %1" + : "=r"(prev), "+m"(*lock) + : "0"((uint32_t)1) + : "memory"); + if (prev == 0) return; // We got the lock + // Spin with pause (reduces power and bus traffic) + while (*lock) { + asm volatile("pause" ::: "memory"); + } + } +} + +static inline void spinlock_release(spinlock_t *lock) { + asm volatile("" ::: "memory"); // compiler barrier + *lock = 0; +} + +// Try to acquire without blocking. Returns 1 if acquired, 0 if not. +static inline int spinlock_try(spinlock_t *lock) { + uint32_t prev; + asm volatile("lock xchgl %0, %1" + : "=r"(prev), "+m"(*lock) + : "0"((uint32_t)1) + : "memory"); + return (prev == 0); +} + +// IRQ-safe spinlock: saves flags, disables interrupts, then acquires. +// Use when the lock may be contended from interrupt context. +static inline uint64_t spinlock_acquire_irqsave(spinlock_t *lock) { + uint64_t flags; + asm volatile("pushfq; pop %0; cli" : "=r"(flags) :: "memory"); + spinlock_acquire(lock); + return flags; +} + +static inline void spinlock_release_irqrestore(spinlock_t *lock, uint64_t flags) { + spinlock_release(lock); + asm volatile("push %0; popfq" : : "r"(flags) : "memory"); +} + +#endif diff --git a/src/sys/work_queue.c b/src/sys/work_queue.c new file mode 100644 index 0000000..22e6b55 --- /dev/null +++ b/src/sys/work_queue.c @@ -0,0 +1,62 @@ +// Copyright (c) 2023-2026 Chris (boreddevnl) +// This software is released under the GNU General Public License v3.0. See LICENSE file for details. +// This header needs to maintain in any file it is present in, as per the GPL license terms. +#include "work_queue.h" +#include "spinlock.h" + +extern void serial_write(const char *str); + +#define WORK_QUEUE_SIZE 64 + +static work_item_t work_queue[WORK_QUEUE_SIZE]; +static volatile int wq_head = 0; +static volatile int wq_tail = 0; +static spinlock_t wq_lock = SPINLOCK_INIT; + +void work_queue_submit(work_fn_t fn, void *arg) { + if (!fn) return; + + uint64_t flags = spinlock_acquire_irqsave(&wq_lock); + int next_tail = (wq_tail + 1) % WORK_QUEUE_SIZE; + if (next_tail == wq_head) { + // Queue full — drop the work item + spinlock_release_irqrestore(&wq_lock, flags); + return; + } + work_queue[wq_tail].fn = fn; + work_queue[wq_tail].arg = arg; + wq_tail = next_tail; + spinlock_release_irqrestore(&wq_lock, flags); +} + +bool work_queue_drain_one(void) { + uint64_t flags = spinlock_acquire_irqsave(&wq_lock); + if (wq_head == wq_tail) { + spinlock_release_irqrestore(&wq_lock, flags); + return false; + } + work_item_t item = work_queue[wq_head]; + wq_head = (wq_head + 1) % WORK_QUEUE_SIZE; + spinlock_release_irqrestore(&wq_lock, flags); + + // Execute outside the lock + if (item.fn) { + item.fn(item.arg); + } + return true; +} + +void work_queue_drain_loop(void) { + while (1) { + // Try to drain all pending work + while (work_queue_drain_one()) { + // Keep draining + } + + // No work — halt the CPU until an interrupt wakes us. + // With legacy PIC, APs don't receive timer interrupts, so they'll + // sleep until an IPI is sent (e.g., when work is submitted). + // This is ideal: APs use 0% CPU when idle. + asm volatile("sti; hlt; cli"); + } +} diff --git a/src/sys/work_queue.h b/src/sys/work_queue.h new file mode 100644 index 0000000..75ec5c0 --- /dev/null +++ b/src/sys/work_queue.h @@ -0,0 +1,33 @@ +// Copyright (c) 2023-2026 Chris (boreddevnl) +// This software is released under the GNU General Public License v3.0. See LICENSE file for details. +// This header needs to maintain in any file it is present in, as per the GPL license terms. +#ifndef WORK_QUEUE_H +#define WORK_QUEUE_H + +#include +#include +#include "spinlock.h" + +// A simple work queue for offloading tasks to idle AP cores. +// Producer (BSP or any core) calls work_queue_submit(). +// Consumer (AP idle loops) calls work_queue_drain_loop(). + +typedef void (*work_fn_t)(void *arg); + +typedef struct { + work_fn_t fn; + void *arg; +} work_item_t; + +// Submit a work item. Thread-safe (uses spinlock internally). +void work_queue_submit(work_fn_t fn, void *arg); + +// Drain and execute all pending work items, then hlt until more arrive. +// Called from AP idle loops. Never returns. +void work_queue_drain_loop(void); + +// Drain one item (if available). Returns true if work was done. +// Useful for BSP to optionally help if idle. +bool work_queue_drain_one(void); + +#endif