From 4e8ea5acd2ec5ae285156045c10286442ae6cafc Mon Sep 17 00:00:00 2001 From: boreddevnl Date: Sun, 22 Mar 2026 21:04:50 +0100 Subject: [PATCH] perf: fix core starvation --- docs/architecture/window_manager.md | 8 ++--- src/sys/syscall.c | 52 +++++++++++++++++++++-------- src/wm/graphics.c | 48 ++++++++++++++++---------- src/wm/wm.h | 2 ++ 4 files changed, 74 insertions(+), 36 deletions(-) diff --git a/docs/architecture/window_manager.md b/docs/architecture/window_manager.md index eb737ed..808ed91 100644 --- a/docs/architecture/window_manager.md +++ b/docs/architecture/window_manager.md @@ -38,11 +38,11 @@ The WM acts as the central hub for input routing. With the introduction of Symmetric Multi-Processing (SMP), the Window Manager (WM) was redesigned to ensure stability and high performance across multiple cores. -1. **Global GUI Lock (`wm_lock`)**: To prevent race conditions when multiple cores attempt to create windows, move cursors, or update pixels, the WM utilizes a central spinlock. All `GUI_CMD` system calls are protected by this lock. -2. **Deferred Rendering**: Previously, the desktop was repainted inside the timer interrupt. On multi-core systems, this caused severe "core starvation" as all other CPUs would spin waiting for the GUI lock during the long draw cycle. -3. **Kernel Loop Integration**: Final screen composition (`wm_paint`) is now deferred to the main kernel idle loop on the Bootstrap Processor (BSP). This allows application cores to continue processing logic while the GUI asynchronously flips the framebuffer. +1. **Granular Window Locks**: Each `Window` object possesses its own `spinlock_t lock;`. User applications concurrently draw directly into their own window buffers without stalling the rest of the system. The global `wm_lock` is reserved strictly for altering global structures like window z-order or syncing buffers to the screen compositing layer. +2. **Per-CPU Rendering State**: To facilitate simultaneous GUI system calls across all CPU cores, the low-level rendering context (`g_render_target` array) is isolated per-CPU using the core ID. This allows completely lockless multi-core pixel rasterization, drastically reducing rendering bottlenecks. +3. **Deferred Compositing**: Final screen composition (`wm_paint`) is scheduled to the main kernel idle loop on the Bootstrap Processor (BSP). This enables application cores to continue processing logic seamlessly while the GUI asynchronously handles flipping the physical framebuffer. > [!IMPORTANT] -> Because rendering is now asynchronous to the timer, application performance is significantly higher as they are no longer bottlenecked by interrupt-context drawing. +> Because application rendering (rasterizing geometry into a window's backbuffer) is SMP-safe and lock-free across cores, GUI performance scales linearly with the number of CPUs active. --- diff --git a/src/sys/syscall.c b/src/sys/syscall.c index edd8331..fe200ad 100644 --- a/src/sys/syscall.c +++ b/src/sys/syscall.c @@ -215,6 +215,7 @@ static uint64_t syscall_handler_inner(registers_t *regs) { win->cursor_pos = 0; win->data = proc; win->font = NULL; + win->lock = SPINLOCK_INIT; serial_write("Kernel: Dims initialized.\n"); @@ -269,7 +270,9 @@ static uint64_t syscall_handler_inner(registers_t *regs) { extern void graphics_set_render_target(uint32_t *buffer, int w, int h); uint64_t rflags; - rflags = wm_lock_acquire(); + bool use_wm_lock = (win->pixels == NULL); + if (use_wm_lock) rflags = wm_lock_acquire(); + else rflags = spinlock_acquire_irqsave(&win->lock); if (win->pixels) { // Strict user-to-window relative clamping @@ -289,7 +292,8 @@ static uint64_t syscall_handler_inner(registers_t *regs) { draw_rect(win->x + params[0], win->y + params[1], params[2], params[3], color); } - wm_lock_release(rflags); + if (use_wm_lock) wm_lock_release(rflags); + else spinlock_release_irqrestore(&win->lock, rflags); } } else if (cmd == GUI_CMD_DRAW_ROUNDED_RECT_FILLED) { Window *win = (Window *)arg2; @@ -303,7 +307,9 @@ static uint64_t syscall_handler_inner(registers_t *regs) { extern void graphics_set_render_target(uint32_t *buffer, int w, int h); uint64_t rflags; - rflags = wm_lock_acquire(); + bool use_wm_lock = (win->pixels == NULL); + if (use_wm_lock) rflags = wm_lock_acquire(); + else rflags = spinlock_acquire_irqsave(&win->lock); if (win->pixels) { int rx = (int)params[0]; int ry = (int)params[1]; @@ -321,7 +327,8 @@ static uint64_t syscall_handler_inner(registers_t *regs) { } } - wm_lock_release(rflags); + if (use_wm_lock) wm_lock_release(rflags); + else spinlock_release_irqrestore(&win->lock, rflags); } } else if (cmd == GUI_CMD_DRAW_STRING) { Window *win = (Window *)arg2; @@ -344,7 +351,9 @@ static uint64_t syscall_handler_inner(registers_t *regs) { kernel_str[i] = 0; uint64_t rflags; - rflags = wm_lock_acquire(); + bool use_wm_lock = (win->pixels == NULL); + if (use_wm_lock) rflags = wm_lock_acquire(); + else rflags = spinlock_acquire_irqsave(&win->lock); ttf_font_t *font = win->font ? (ttf_font_t*)win->font : graphics_get_current_ttf(); @@ -380,7 +389,8 @@ static uint64_t syscall_handler_inner(registers_t *regs) { } } - wm_lock_release(rflags); + if (use_wm_lock) wm_lock_release(rflags); + else spinlock_release_irqrestore(&win->lock, rflags); } } else if (cmd == 10) { // GUI_CMD_DRAW_STRING_BITMAP Window *win = (Window *)arg2; @@ -403,7 +413,9 @@ static uint64_t syscall_handler_inner(registers_t *regs) { kernel_str[i] = 0; uint64_t rflags; - rflags = wm_lock_acquire(); + bool use_wm_lock = (win->pixels == NULL); + if (use_wm_lock) rflags = wm_lock_acquire(); + else rflags = spinlock_acquire_irqsave(&win->lock); if (win->pixels) { if (ux >= -100 && ux < win->w && uy >= -100 && uy < (win->h - 20)) { @@ -415,7 +427,8 @@ static uint64_t syscall_handler_inner(registers_t *regs) { draw_string_bitmap(win->x + ux, win->y + uy, kernel_str, color); } - wm_lock_release(rflags); + if (use_wm_lock) wm_lock_release(rflags); + else spinlock_release_irqrestore(&win->lock, rflags); } } else if (cmd == 11) { // GUI_CMD_DRAW_STRING_SCALED Window *win = (Window *)arg2; @@ -442,7 +455,9 @@ static uint64_t syscall_handler_inner(registers_t *regs) { kernel_str[i] = 0; uint64_t rflags; - rflags = wm_lock_acquire(); + bool use_wm_lock = (win->pixels == NULL); + if (use_wm_lock) rflags = wm_lock_acquire(); + else rflags = spinlock_acquire_irqsave(&win->lock); ttf_font_t *font = win->font ? (ttf_font_t*)win->font : graphics_get_current_ttf(); @@ -478,7 +493,8 @@ static uint64_t syscall_handler_inner(registers_t *regs) { } } - wm_lock_release(rflags); + if (use_wm_lock) wm_lock_release(rflags); + else spinlock_release_irqrestore(&win->lock, rflags); } } else if (cmd == 18) { // GUI_CMD_DRAW_STRING_SCALED_SLOPED Window *win = (Window *)arg2; @@ -515,7 +531,9 @@ static uint64_t syscall_handler_inner(registers_t *regs) { kernel_str[i] = 0; uint64_t rflags; - rflags = wm_lock_acquire(); + bool use_wm_lock = (win->pixels == NULL); + if (use_wm_lock) rflags = wm_lock_acquire(); + else rflags = spinlock_acquire_irqsave(&win->lock); ttf_font_t *font = win->font ? (ttf_font_t*)win->font : graphics_get_current_ttf(); @@ -553,7 +571,8 @@ static uint64_t syscall_handler_inner(registers_t *regs) { } } - wm_lock_release(rflags); + if (use_wm_lock) wm_lock_release(rflags); + else spinlock_release_irqrestore(&win->lock, rflags); } } else if (cmd == GUI_CMD_DRAW_IMAGE) { Window *win = (Window *)arg2; @@ -564,7 +583,9 @@ static uint64_t syscall_handler_inner(registers_t *regs) { for (int i = 0; i < 4; i++) params[i] = u_params[i]; uint64_t rflags; - rflags = wm_lock_acquire(); + bool use_wm_lock = (win->pixels == NULL); + if (use_wm_lock) rflags = wm_lock_acquire(); + else rflags = spinlock_acquire_irqsave(&win->lock); if (win->pixels) { int rx = (int)params[0]; int ry = (int)params[1]; @@ -599,7 +620,8 @@ static uint64_t syscall_handler_inner(registers_t *regs) { } } - wm_lock_release(rflags); + if (use_wm_lock) wm_lock_release(rflags); + else spinlock_release_irqrestore(&win->lock, rflags); } } else if (cmd == GUI_CMD_MARK_DIRTY) { uint64_t rflags = wm_lock_acquire(); @@ -611,8 +633,10 @@ static uint64_t syscall_handler_inner(registers_t *regs) { // Dual-buffer commit: copy pixels to comp_pixels if (win->pixels && win->comp_pixels) { + uint64_t win_rflags = spinlock_acquire_irqsave(&win->lock); extern void mem_memcpy(void *dest, const void *src, size_t len); mem_memcpy(win->comp_pixels, win->pixels, (size_t)win->w * (win->h - 20) * 4); + spinlock_release_irqrestore(&win->lock, win_rflags); } wm_mark_dirty(win->x + (int)params[0], win->y + (int)params[1], (int)params[2], (int)params[3]); } diff --git a/src/wm/graphics.c b/src/wm/graphics.c index 80d191b..8a2ed9e 100644 --- a/src/wm/graphics.c +++ b/src/wm/graphics.c @@ -34,9 +34,11 @@ static uint32_t g_back_buffer[MAX_FB_WIDTH * MAX_FB_HEIGHT] __attribute__((align static int g_clip_x = 0, g_clip_y = 0, g_clip_w = 0, g_clip_h = 0; static bool g_clip_enabled = false; -static uint32_t *g_render_target = NULL; -static int g_rt_width = 0; -static int g_rt_height = 0; +extern uint32_t smp_this_cpu_id(void); +#define MAX_RENDER_CPUS 32 +static uint32_t *g_render_target[MAX_RENDER_CPUS] = {0}; +static int g_rt_width[MAX_RENDER_CPUS] = {0}; +static int g_rt_height[MAX_RENDER_CPUS] = {0}; static ttf_font_t *g_current_ttf = NULL; @@ -176,15 +178,19 @@ void graphics_clear_dirty_no_lock(void) { } void graphics_set_render_target(uint32_t *buffer, int w, int h) { - g_render_target = buffer; - g_rt_width = w; - g_rt_height = h; + uint32_t cpu = smp_this_cpu_id(); + if (cpu < MAX_RENDER_CPUS) { + g_render_target[cpu] = buffer; + g_rt_width[cpu] = w; + g_rt_height[cpu] = h; + } } void put_pixel(int x, int y, uint32_t color) { - if (g_render_target) { - if (x >= 0 && x < g_rt_width && y >= 0 && y < g_rt_height) { - g_render_target[y * g_rt_width + x] = color; + uint32_t cpu = smp_this_cpu_id(); + if (cpu < MAX_RENDER_CPUS && g_render_target[cpu]) { + if (x >= 0 && x < g_rt_width[cpu] && y >= 0 && y < g_rt_height[cpu]) { + g_render_target[cpu][y * g_rt_width[cpu] + x] = color; } return; } @@ -204,9 +210,10 @@ void put_pixel(int x, int y, uint32_t color) { } uint32_t graphics_get_pixel(int x, int y) { - if (g_render_target) { - if (x >= 0 && x < g_rt_width && y >= 0 && y < g_rt_height) { - return g_render_target[y * g_rt_width + x]; + uint32_t cpu = smp_this_cpu_id(); + if (cpu < MAX_RENDER_CPUS && g_render_target[cpu]) { + if (x >= 0 && x < g_rt_width[cpu] && y >= 0 && y < g_rt_height[cpu]) { + return g_render_target[cpu][y * g_rt_width[cpu] + x]; } return 0; } @@ -220,15 +227,16 @@ uint32_t graphics_get_pixel(int x, int y) { void draw_rect(int x, int y, int w, int h, uint32_t color) { int x1 = x, y1 = y, x2 = x + w, y2 = y + h; - if (g_render_target) { + uint32_t cpu = smp_this_cpu_id(); + if (cpu < MAX_RENDER_CPUS && g_render_target[cpu]) { if (x1 < 0) x1 = 0; if (y1 < 0) y1 = 0; - if (x2 > g_rt_width) x2 = g_rt_width; - if (y2 > g_rt_height) y2 = g_rt_height; + if (x2 > g_rt_width[cpu]) x2 = g_rt_width[cpu]; + if (y2 > g_rt_height[cpu]) y2 = g_rt_height[cpu]; if (x1 >= x2 || y1 >= y2) return; for (int i = y1; i < y2; i++) { - uint32_t *row = &g_render_target[i * g_rt_width + x1]; + uint32_t *row = &g_render_target[cpu][i * g_rt_width[cpu] + x1]; int len = x2 - x1; for (int j = 0; j < len; j++) { row[j] = color; @@ -461,7 +469,9 @@ void draw_char(int x, int y, char c, uint32_t color) { unsigned char uc = (unsigned char)c; if (uc > 127) return; - if (g_clip_enabled && !g_render_target) { + uint32_t cpu = smp_this_cpu_id(); + bool has_rt = (cpu < MAX_RENDER_CPUS && g_render_target[cpu]); + if (g_clip_enabled && !has_rt) { if (x + 8 <= g_clip_x || x >= g_clip_x + g_clip_w || y + 8 <= g_clip_y || y >= g_clip_y + g_clip_h) { return; @@ -484,7 +494,9 @@ void draw_char_bitmap(int x, int y, char c, uint32_t color) { unsigned char uc = (unsigned char)c; if (uc > 127) return; - if (g_clip_enabled && !g_render_target) { + uint32_t cpu = smp_this_cpu_id(); + bool has_rt = (cpu < MAX_RENDER_CPUS && g_render_target[cpu]); + if (g_clip_enabled && !has_rt) { if (x + 8 <= g_clip_x || x >= g_clip_x + g_clip_w || y + 8 <= g_clip_y || y >= g_clip_y + g_clip_h) { return; diff --git a/src/wm/wm.h b/src/wm/wm.h index b246bb7..a24494a 100644 --- a/src/wm/wm.h +++ b/src/wm/wm.h @@ -6,6 +6,7 @@ #include #include +#include "../sys/spinlock.h" uint64_t wm_lock_acquire(void); void wm_lock_release(uint64_t flags); @@ -55,6 +56,7 @@ struct Window { uint32_t *pixels; uint32_t *comp_pixels; void *font; + spinlock_t lock; // Callbacks void (*paint)(Window *win);