cpu_thread_new_kstack: Introduce new MD callback

Previously, the cpu_thread_alloc callback was invoked each time a
kernel stack was allocated for a thread.  This included thread
creation, but it was also invoked if a recycled thread had to allocate
a new kstack.  This means that cpu_thread_alloc could be called
multiple times for a single thread, but cpu_thread_free is only called
once.  Not only that, but the cpu_thread_alloc callback can't tell if
it is being invoked on a new thread object, or a recycled thread.
Calling *_alloc multiple times on an object is also atypical for
kernel APIs.

As a result of this confusion, amd64 was potentially leaking an XSAVE
buffer each time a new kstack was allocated for an existing thread,
since cpu_thread_alloc for amd64 always allocated a new XSAVE buffer.
In practice, this edge case is probably rare.  A process object needs
to be recycled where either the new or old process is a kernel process
with a non-default kernel stack size.

Nevertheless, to ease the confusion, redefine cpu_thread_alloc to only
be called once when a new thread is allocated.  The new callback,
cpu_thread_new_kstack is invoked each time a kstack is allocated for a
thread, including both at thread creation time and if a recycled
thread allocates a new kstack.  The new callback should set any fields
whose value is dependent on td_kstack (e.g. the user frame in
td_frame, or td_pcb if the PCB is allocated on the kstack).

Reviewed by:	kib, andrew (arm changes)
Sponsored by:	AFRL, DARPA
Pull Request:	https://ron-dev.freebsd.org/FreeBSD/src/pulls/23
This commit is contained in:
John Baldwin
2026-03-28 13:56:24 +00:00
parent 59d67f2868
commit 2893bb741b
10 changed files with 55 additions and 13 deletions
+1
View File
@@ -943,6 +943,7 @@ MLINKS+=cpu_machdep.9 cpu_copy_thread.9 \
cpu_machdep.9 cpu_thread_clean.9 \ cpu_machdep.9 cpu_thread_clean.9 \
cpu_machdep.9 cpu_thread_exit.9 \ cpu_machdep.9 cpu_thread_exit.9 \
cpu_machdep.9 cpu_thread_free.9 \ cpu_machdep.9 cpu_thread_free.9 \
cpu_machdep.9 cpu_thread_new_kstack.9 \
cpu_machdep.9 cpu_throw.9 \ cpu_machdep.9 cpu_throw.9 \
cpu_machdep.9 cpu_update_pcb.9 cpu_machdep.9 cpu_update_pcb.9
MLINKS+=cpuset.9 CPUSET_T_INITIALIZER.9 \ MLINKS+=cpuset.9 CPUSET_T_INITIALIZER.9 \
+22 -6
View File
@@ -8,7 +8,7 @@
.\" Technology), and Capabilities Limited under Defense Advanced Research .\" Technology), and Capabilities Limited under Defense Advanced Research
.\" Projects Agency (DARPA) Contract No. FA8750-24-C-B047 ("DEC"). .\" Projects Agency (DARPA) Contract No. FA8750-24-C-B047 ("DEC").
.\" .\"
.Dd January 31, 2025 .Dd May 27, 2026
.Dt cpu_machdep 9 .Dt cpu_machdep 9
.Os .Os
.Sh NAME .Sh NAME
@@ -31,6 +31,7 @@
.Nm cpu_thread_clean , .Nm cpu_thread_clean ,
.Nm cpu_thread_exit , .Nm cpu_thread_exit ,
.Nm cpu_thread_free , .Nm cpu_thread_free ,
.Nm cpu_thread_new_kstack ,
.Nm cpu_throw , .Nm cpu_throw ,
.Nm cpu_update_pcb .Nm cpu_update_pcb
.Nd machine-dependent interfaces to handle CPU and thread state .Nd machine-dependent interfaces to handle CPU and thread state
@@ -84,6 +85,8 @@
.Ft void .Ft void
.Fn cpu_thread_free "struct thread *td" .Fn cpu_thread_free "struct thread *td"
.Ft void .Ft void
.Fn cpu_thread_new_kstack "struct thread *td"
.Ft void
.Fn cpu_throw "struct thread *old" "struct thread *new" .Fn cpu_throw "struct thread *old" "struct thread *new"
.Ft void .Ft void
.Fn cpu_update_pcb "struct thread *td" .Fn cpu_update_pcb "struct thread *td"
@@ -366,19 +369,25 @@ When the process object is later reused for a new process in
.Xr fork 2 , .Xr fork 2 ,
the kernel recycles that last thread object and uses it as the initial the kernel recycles that last thread object and uses it as the initial
thread in the new process. thread in the new process.
When a thread is recycled, some of the steps in the thread allocation When a thread is recycled, a new kernel stack may be allocated if
and free cycle are skipped as an optimization. the existing kernel stack is not suitable for the new process.
.Pp .Pp
.Fn cpu_thread_alloc .Fn cpu_thread_alloc
initializes machine-dependent fields in initializes machine-dependent fields in
.Fa td .Fa td
when allocating a new thread object.
.Pp
.Fn cpu_thread_new_kstack
initializes kernel stack-related machine-dependent fields in
.Fa td
after allocating a new kernel stack. after allocating a new kernel stack.
This function typically sets the This function typically sets the
.Fa td_pcb .Fa td_pcb
.Pq on architectures which store the pcb in the kernel stack
and initial and initial
.Fa td_frame .Fa td_frame
pointers. pointers.
.Fn cpu_thread_alloc .Fn cpu_thread_new_kstack
is called both when allocating a new thread object and is called both when allocating a new thread object and
when a recycled thread allocates a new kernel stack. when a recycled thread allocates a new kernel stack.
Note that this function is Note that this function is
@@ -386,12 +395,19 @@ Note that this function is
called if a recycled thread reuses its existing kernel stack. called if a recycled thread reuses its existing kernel stack.
.Pp .Pp
.Fn cpu_thread_clean .Fn cpu_thread_clean
releases any machine-dependent resources for the last thread in a releases machine-dependent resources for the last thread in a
process during process during
.Xr wait 2 . .Xr wait 2 .
The thread is a candidate for recycling so should be reset to run as a Since the thread is a candidate for recycling,
machine-dependent fields should be reset to run as a
new thread in case it is recycled by a future new thread in case it is recycled by a future
.Xr fork 2 . .Xr fork 2 .
In particular,
if the thread reuses its existing kernel stack,
no other
.Fn cpu_thread_*
function will be invoked before the thread is reused as the main
thread of a new process.
.Pp .Pp
.Fn cpu_thread_exit .Fn cpu_thread_exit
cleans any machine-dependent state in cleans any machine-dependent state in
+7 -2
View File
@@ -376,13 +376,18 @@ cpu_thread_alloc(struct thread *td)
{ {
struct pcb *pcb; struct pcb *pcb;
set_top_of_stack_td(td);
td->td_pcb = pcb = get_pcb_td(td); td->td_pcb = pcb = get_pcb_td(td);
td->td_frame = (struct trapframe *)td->td_md.md_stack_base - 1;
td->td_md.md_usr_fpu_save = fpu_save_area_alloc(); td->td_md.md_usr_fpu_save = fpu_save_area_alloc();
pcb->pcb_save = get_pcb_user_save_pcb(pcb); pcb->pcb_save = get_pcb_user_save_pcb(pcb);
} }
void
cpu_thread_new_kstack(struct thread *td)
{
set_top_of_stack_td(td);
td->td_frame = (struct trapframe *)td->td_md.md_stack_base - 1;
}
void void
cpu_thread_free(struct thread *td) cpu_thread_free(struct thread *td)
{ {
+5
View File
@@ -240,6 +240,11 @@ cpu_thread_exit(struct thread *td)
void void
cpu_thread_alloc(struct thread *td) cpu_thread_alloc(struct thread *td)
{
}
void
cpu_thread_new_kstack(struct thread *td)
{ {
td->td_pcb = (struct pcb *)(td->td_kstack + td->td_kstack_pages * td->td_pcb = (struct pcb *)(td->td_kstack + td->td_kstack_pages *
PAGE_SIZE) - 1; PAGE_SIZE) - 1;
+5 -1
View File
@@ -260,12 +260,16 @@ cpu_thread_exit(struct thread *td)
void void
cpu_thread_alloc(struct thread *td) cpu_thread_alloc(struct thread *td)
{ {
ptrauth_thread_alloc(td);
}
void
cpu_thread_new_kstack(struct thread *td)
{
td->td_pcb = (struct pcb *)(td->td_kstack + td->td_pcb = (struct pcb *)(td->td_kstack +
td->td_kstack_pages * PAGE_SIZE) - 1; td->td_kstack_pages * PAGE_SIZE) - 1;
td->td_frame = (struct trapframe *)STACKALIGN( td->td_frame = (struct trapframe *)STACKALIGN(
(struct trapframe *)td->td_pcb - 1); (struct trapframe *)td->td_pcb - 1);
ptrauth_thread_alloc(td);
} }
void void
+5
View File
@@ -371,6 +371,11 @@ cpu_thread_clean(struct thread *td)
void void
cpu_thread_alloc(struct thread *td) cpu_thread_alloc(struct thread *td)
{
}
void
cpu_thread_new_kstack(struct thread *td)
{ {
struct pcb *pcb; struct pcb *pcb;
+2 -1
View File
@@ -803,6 +803,7 @@ thread_alloc(int pages)
kasan_thread_alloc(td); kasan_thread_alloc(td);
kmsan_thread_alloc(td); kmsan_thread_alloc(td);
cpu_thread_alloc(td); cpu_thread_alloc(td);
cpu_thread_new_kstack(td);
EVENTHANDLER_DIRECT_INVOKE(thread_ctor, td); EVENTHANDLER_DIRECT_INVOKE(thread_ctor, td);
return (td); return (td);
} }
@@ -815,7 +816,7 @@ thread_recycle(struct thread *td, int pages)
vm_thread_dispose(td); vm_thread_dispose(td);
if (!vm_thread_new(td, pages)) if (!vm_thread_new(td, pages))
return (ENOMEM); return (ENOMEM);
cpu_thread_alloc(td); cpu_thread_new_kstack(td);
} }
kasan_thread_alloc(td); kasan_thread_alloc(td);
kmsan_thread_alloc(td); kmsan_thread_alloc(td);
+5
View File
@@ -1079,6 +1079,11 @@ cpu_thread_clean(struct thread *td)
void void
cpu_thread_alloc(struct thread *td) cpu_thread_alloc(struct thread *td)
{
}
void
cpu_thread_new_kstack(struct thread *td)
{ {
struct pcb *pcb; struct pcb *pcb;
+2 -3
View File
@@ -58,8 +58,8 @@
#define TP_OFFSET 16 /* sizeof(struct tcb) */ #define TP_OFFSET 16 /* sizeof(struct tcb) */
#endif #endif
static void void
cpu_set_pcb_frame(struct thread *td) cpu_thread_new_kstack(struct thread *td)
{ {
td->td_pcb = (struct pcb *)(td->td_kstack + td->td_pcb = (struct pcb *)(td->td_kstack +
td->td_kstack_pages * PAGE_SIZE) - 1; td->td_kstack_pages * PAGE_SIZE) - 1;
@@ -228,7 +228,6 @@ cpu_thread_exit(struct thread *td)
void void
cpu_thread_alloc(struct thread *td) cpu_thread_alloc(struct thread *td)
{ {
cpu_set_pcb_frame(td);
} }
void void
+1
View File
@@ -1246,6 +1246,7 @@ void cpu_thread_alloc(struct thread *);
void cpu_thread_clean(struct thread *); void cpu_thread_clean(struct thread *);
void cpu_thread_exit(struct thread *); void cpu_thread_exit(struct thread *);
void cpu_thread_free(struct thread *); void cpu_thread_free(struct thread *);
void cpu_thread_new_kstack(struct thread *);
struct thread *thread_alloc(int pages); struct thread *thread_alloc(int pages);
int thread_check_susp(struct thread *td, bool sleep); int thread_check_susp(struct thread *td, bool sleep);
void thread_cow_get_proc(struct thread *newtd, struct proc *p); void thread_cow_get_proc(struct thread *newtd, struct proc *p);