From d0b10df718e3560cd041ba9ddb157591ac7fa886 Mon Sep 17 00:00:00 2001 From: John Baldwin Date: Wed, 25 Mar 2026 15:18:05 +0000 Subject: [PATCH] arm64: Move pcb out of kstack into a new UMA zone This is similar to commit 5e921ff49ea8bb70506248a4675894729cdad8c2 which moved the pcb for amd64, but a bit different. arm64's pcb is much larger (over 1KB!) than amd64's since it still embeds FP registers. Moving the pcb out of the kstack frees up that much additional kstack space. Unlike amd64 however, embedding the pcb in struct mdthread is not practical as the resulting struct thread would grow such that UMA would now store 1 thread per 4k page instead of 2 threads per page. By using a separate UMA zone for pcbs, 2 struct threads can continue to fit in a single 4k page, and 3 pcbs can fit in another 4k page. Reviewed by: kib, jrtc27, andrew Sponsored by: AFRL, DARPA Pull Request: https://ron-dev.freebsd.org/FreeBSD/src/pulls/23 --- sys/arm64/arm64/genassym.c | 2 -- sys/arm64/arm64/locore.S | 3 +-- sys/arm64/arm64/machdep.c | 3 ++- sys/arm64/arm64/vm_machdep.c | 18 ++++++++++++++---- sys/arm64/include/stack.h | 6 ++---- 5 files changed, 19 insertions(+), 13 deletions(-) diff --git a/sys/arm64/arm64/genassym.c b/sys/arm64/arm64/genassym.c index 22696796e69..b54f446e83b 100644 --- a/sys/arm64/arm64/genassym.c +++ b/sys/arm64/arm64/genassym.c @@ -53,8 +53,6 @@ ASSYM(PC_CURPCB, offsetof(struct pcpu, pc_curpcb)); ASSYM(PC_CURTHREAD, offsetof(struct pcpu, pc_curthread)); ASSYM(PC_SSBD, offsetof(struct pcpu, pc_ssbd)); -/* Size of pcb, rounded to keep stack alignment */ -ASSYM(PCB_SIZE, roundup2(sizeof(struct pcb), STACKALIGNBYTES + 1)); ASSYM(PCB_SINGLE_STEP_SHIFT, PCB_SINGLE_STEP_SHIFT); ASSYM(PCB_REGS, offsetof(struct pcb, pcb_x)); ASSYM(PCB_X19, PCB_X19); diff --git a/sys/arm64/arm64/locore.S b/sys/arm64/arm64/locore.S index b200aa93c28..bd61b485edf 100644 --- a/sys/arm64/arm64/locore.S +++ b/sys/arm64/arm64/locore.S @@ -128,8 +128,7 @@ virtdone: /* Set up the stack */ adrp x25, initstack_end - add x25, x25, :lo12:initstack_end - sub sp, x25, #PCB_SIZE + add sp, x25, :lo12:initstack_end /* Zero the BSS */ ldr x15, .Lbss diff --git a/sys/arm64/arm64/machdep.c b/sys/arm64/arm64/machdep.c index 6b6dd2c510a..d33208a0819 100644 --- a/sys/arm64/arm64/machdep.c +++ b/sys/arm64/arm64/machdep.c @@ -131,6 +131,7 @@ uintptr_t boot_canary = 0x49a2d892bc05a0b1ul; #endif static struct trapframe proc0_tf; +static struct pcb pcb0; int early_boot = 1; int cold = 1; @@ -443,7 +444,7 @@ init_proc0(void *kstack) #if defined(PERTHREAD_SSP) thread0.td_md.md_canary = boot_canary; #endif - thread0.td_pcb = (struct pcb *)td_kstack_top(&thread0) - 1; + thread0.td_pcb = &pcb0; thread0.td_pcb->pcb_flags = 0; thread0.td_pcb->pcb_fpflags = 0; thread0.td_pcb->pcb_fpusaved = &thread0.td_pcb->pcb_fpustate; diff --git a/sys/arm64/arm64/vm_machdep.c b/sys/arm64/arm64/vm_machdep.c index 29a1c8667e7..3789dd318c4 100644 --- a/sys/arm64/arm64/vm_machdep.c +++ b/sys/arm64/arm64/vm_machdep.c @@ -27,8 +27,8 @@ #include "opt_platform.h" -#include #include +#include #include #include #include @@ -61,6 +61,8 @@ */ cpu_reset_hook_t cpu_reset_hook = psci_reset; +static uma_zone_t pcb_zone; + /* * Finish a fork operation, with process p2 nearly set up. * Copy and update the pcb, set up the stack so that the child @@ -260,20 +262,20 @@ cpu_thread_exit(struct thread *td) void cpu_thread_alloc(struct thread *td) { + td->td_pcb = uma_zalloc(pcb_zone, M_WAITOK); ptrauth_thread_alloc(td); } void cpu_thread_new_kstack(struct thread *td) { - td->td_pcb = (struct pcb *)td_kstack_top(td) - 1; - td->td_frame = (struct trapframe *)STACKALIGN( - (struct trapframe *)td->td_pcb - 1); + td->td_frame = (struct trapframe *)td_kstack_top(td) - 1; } void cpu_thread_free(struct thread *td) { + uma_zfree(pcb_zone, td->td_pcb); } void @@ -333,3 +335,11 @@ cpu_sync_core(void) * return from ELx is a context synchronization event. */ } + +static void +pcbinit(void *dummy __unused) +{ + pcb_zone = uma_zcreate("pcb", sizeof(struct pcb), NULL, NULL, NULL, + NULL, UMA_ALIGNOF(struct pcb), 0); +} +SYSINIT(pcbinit, SI_SUB_INTRINSIC, SI_ORDER_ANY, pcbinit, NULL); diff --git a/sys/arm64/include/stack.h b/sys/arm64/include/stack.h index 0aa483e15e6..23e7a5af27d 100644 --- a/sys/arm64/include/stack.h +++ b/sys/arm64/include/stack.h @@ -39,11 +39,9 @@ bool unwind_frame(struct thread *, struct unwind_state *); #ifdef _SYS_PROC_H_ -#include - #define GET_STACK_USAGE(total, used) do { \ struct thread *td = curthread; \ - (total) = ptoa(td->td_kstack_pages) - sizeof(struct pcb); \ + (total) = ptoa(td->td_kstack_pages); \ (used) = td->td_kstack + (total) - (char *)&td; \ } while (0) @@ -51,7 +49,7 @@ static __inline bool kstack_contains(struct thread *td, vm_offset_t va, size_t len) { return (va >= (vm_offset_t)td->td_kstack && va + len >= va && - va + len <= (vm_offset_t)td_kstack_top(td) - sizeof(struct pcb)); + va + len <= (vm_offset_t)td_kstack_top(td)); } #endif /* _SYS_PROC_H_ */