jail: simplify EVFILT_JAIL events
Instead of using the EVFILT_PROC model of attempting to automatically register new events when a child jail is created, just give a single event when a child jail is created. As was already done with jail attach events, make a best-effort report of the added jail's id in kn_data. If the are multiple NOTE_JAIL_CHILD and/or NOTE_JAIL_ATTACH events, set the NOTE_JAIL_MULTI flag, and don't report anything in data, indicating that the caller will need to query the system state on their own. MFC after: 3 days
This commit is contained in:
+15
-26
@@ -22,7 +22,7 @@
|
||||
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
.\" SUCH DAMAGE.
|
||||
.\"
|
||||
.Dd September 4, 2025
|
||||
.Dd September 11, 2025
|
||||
.Dt KQUEUE 2
|
||||
.Os
|
||||
.Sh NAME
|
||||
@@ -614,41 +614,30 @@ The process ID will be stored in
|
||||
If more than one process has attached since the last call to
|
||||
.Fn kevent ,
|
||||
.Va data
|
||||
will contain the most recently attached process ID,
|
||||
with
|
||||
.Dv NOTE_JAIL_ATTACH_MULTI
|
||||
set in
|
||||
.Va fflags .
|
||||
will be zero.
|
||||
.It Dv NOTE_JAIL_REMOVE
|
||||
The jail has been removed.
|
||||
.It Dv NOTE_JAIL_CHILD
|
||||
A child of the watched jail has been created.
|
||||
.It Dv NOTE_TRACK
|
||||
Follow child jails created under this jail.
|
||||
Register a new kevent to monitor the child jail using the same
|
||||
.Va fflags
|
||||
as the original event.
|
||||
The child jail will signal an event with
|
||||
.Dv NOTE_CHILD
|
||||
set in
|
||||
.Va fflags
|
||||
and the parent JID in
|
||||
Its jail ID will be stored in
|
||||
.Va data .
|
||||
.Pp
|
||||
If registering a new kevent fails
|
||||
.Pq usually due to resource limitations ,
|
||||
it will signal an event with
|
||||
.Dv NOTE_TRACKERR
|
||||
set in
|
||||
.Va fflags ,
|
||||
and the child jail will not signal a
|
||||
.Dv NOTE_CHILD
|
||||
event.
|
||||
If more than one jail has been created since the last call to
|
||||
.Fn kevent ,
|
||||
.Va data
|
||||
will be zero.
|
||||
.El
|
||||
.Pp
|
||||
On return,
|
||||
.Va fflags
|
||||
contains the events which triggered the filter.
|
||||
It will also contain
|
||||
.Dv NOTE_JAIL_MULTI
|
||||
if more than one
|
||||
.Dv NOTE_JAIL_ATTACH
|
||||
or
|
||||
.Dv NOTE_JAIL_CHILD
|
||||
event has been received since the last call to
|
||||
.Fn kevent .
|
||||
.It Dv EVFILT_TIMER
|
||||
Establishes an arbitrary timer identified by
|
||||
.Va ident .
|
||||
|
||||
+15
-56
@@ -539,8 +539,7 @@ filt_proc(struct knote *kn, long hint)
|
||||
* process forked. Additionally, for each knote attached to the
|
||||
* parent, check whether user wants to track the new process. If so
|
||||
* attach a new knote to it, and immediately report an event with the
|
||||
* child's pid. This is also called on jail creation, which is treated
|
||||
* the same way by jail events.
|
||||
* child's pid.
|
||||
*/
|
||||
void
|
||||
knote_fork(struct knlist *list, int pid)
|
||||
@@ -567,8 +566,6 @@ knote_fork(struct knlist *list, int pid)
|
||||
/*
|
||||
* The same as knote(), activate the event.
|
||||
*/
|
||||
_Static_assert(NOTE_JAIL_CHILD == NOTE_FORK,
|
||||
"NOTE_JAIL_CHILD should be the same as NOTE_FORK");
|
||||
if ((kn->kn_sfflags & NOTE_TRACK) == 0) {
|
||||
if (kn->kn_fop->f_event(kn, NOTE_FORK))
|
||||
KNOTE_ACTIVATE(kn, 1);
|
||||
@@ -632,30 +629,11 @@ int
|
||||
filt_jailattach(struct knote *kn)
|
||||
{
|
||||
struct prison *pr;
|
||||
bool immediate;
|
||||
|
||||
immediate = false;
|
||||
if (kn->kn_id == 0) {
|
||||
/* Let jid=0 watch the current prison (including prison0). */
|
||||
pr = curthread->td_ucred->cr_prison;
|
||||
mtx_lock(&pr->pr_mtx);
|
||||
} else if (kn->kn_flags & (EV_FLAG1 | EV_FLAG2)) {
|
||||
/*
|
||||
* The kernel registers prisons before they are valid,
|
||||
* so prison_find_child will fail.
|
||||
*/
|
||||
TAILQ_FOREACH(pr, &allprison, pr_list) {
|
||||
if (pr->pr_id < kn->kn_id)
|
||||
continue;
|
||||
if (pr->pr_id > kn->kn_id) {
|
||||
pr = NULL;
|
||||
break;
|
||||
}
|
||||
mtx_lock(&pr->pr_mtx);
|
||||
break;
|
||||
}
|
||||
if (pr == NULL)
|
||||
return (ENOENT);
|
||||
} else {
|
||||
sx_slock(&allprison_lock);
|
||||
pr = prison_find_child(curthread->td_ucred->cr_prison,
|
||||
@@ -670,32 +648,7 @@ filt_jailattach(struct knote *kn)
|
||||
}
|
||||
kn->kn_ptr.p_prison = pr;
|
||||
kn->kn_flags |= EV_CLEAR;
|
||||
|
||||
/*
|
||||
* Internal flag indicating registration done by kernel for the
|
||||
* purposes of getting a NOTE_CHILD notification.
|
||||
*/
|
||||
if (kn->kn_flags & EV_FLAG2) {
|
||||
kn->kn_flags &= ~EV_FLAG2;
|
||||
kn->kn_data = kn->kn_sdata; /* parent id */
|
||||
kn->kn_fflags = NOTE_CHILD;
|
||||
kn->kn_sfflags &= ~NOTE_JAIL_CTRLMASK;
|
||||
immediate = true; /* Force immediate activation of child note. */
|
||||
}
|
||||
/*
|
||||
* Internal flag indicating registration done by kernel (for other than
|
||||
* NOTE_CHILD).
|
||||
*/
|
||||
if (kn->kn_flags & EV_FLAG1) {
|
||||
kn->kn_flags &= ~EV_FLAG1;
|
||||
}
|
||||
|
||||
knlist_add(pr->pr_klist, kn, 1);
|
||||
|
||||
/* Immediately activate any child notes. */
|
||||
if (immediate)
|
||||
KNOTE_ACTIVATE(kn, 0);
|
||||
|
||||
mtx_unlock(&pr->pr_mtx);
|
||||
return (0);
|
||||
}
|
||||
@@ -720,18 +673,24 @@ filt_jail(struct knote *kn, long hint)
|
||||
if (pr == NULL) /* already activated, from attach filter */
|
||||
return (0);
|
||||
|
||||
/* Mask off extra data. */
|
||||
event = (u_int)hint & NOTE_JAIL_CTRLMASK;
|
||||
/*
|
||||
* Mask off extra data. In the NOTE_JAIL_CHILD case, that's
|
||||
* everything except the NOTE_JAIL_CHILD bit itself, since a
|
||||
* JID is any positive integer.
|
||||
*/
|
||||
event = ((u_int)hint & NOTE_JAIL_CHILD) ? NOTE_JAIL_CHILD :
|
||||
(u_int)hint & NOTE_JAIL_CTRLMASK;
|
||||
|
||||
/* If the user is interested in this event, record it. */
|
||||
if (kn->kn_sfflags & event)
|
||||
kn->kn_fflags |= event;
|
||||
|
||||
/* Report the attached process id. */
|
||||
if (event == NOTE_JAIL_ATTACH) {
|
||||
/* Report the created jail id or attached process id. */
|
||||
if (event == NOTE_JAIL_CHILD || event == NOTE_JAIL_ATTACH) {
|
||||
if (kn->kn_data != 0)
|
||||
kn->kn_fflags |= NOTE_JAIL_ATTACH_MULTI;
|
||||
kn->kn_data = hint & NOTE_JAIL_DATAMASK;
|
||||
kn->kn_fflags |= NOTE_JAIL_MULTI;
|
||||
kn->kn_data = (kn->kn_fflags & NOTE_JAIL_MULTI) ? 0U :
|
||||
(u_int)hint & ~event;
|
||||
}
|
||||
|
||||
/* Prison is gone, so flag the event as finished. */
|
||||
@@ -1729,8 +1688,8 @@ kqueue_register(struct kqueue *kq, struct kevent *kev, struct thread *td,
|
||||
/*
|
||||
* If possible, find an existing knote to use for this kevent.
|
||||
*/
|
||||
if ((kev->filter == EVFILT_PROC || kev->filter == EVFILT_JAIL)
|
||||
&& (kev->flags & (EV_FLAG1 | EV_FLAG2)) != 0) {
|
||||
if (kev->filter == EVFILT_PROC &&
|
||||
(kev->flags & (EV_FLAG1 | EV_FLAG2)) != 0) {
|
||||
/* This is an internal creation of a process tracking
|
||||
* note. Don't attempt to coalesce this with an
|
||||
* existing note.
|
||||
|
||||
@@ -2221,9 +2221,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
|
||||
*/
|
||||
if (created) {
|
||||
sx_assert(&allprison_lock, SX_XLOCKED);
|
||||
mtx_lock(&ppr->pr_mtx);
|
||||
knote_fork(ppr->pr_klist, pr->pr_id);
|
||||
mtx_unlock(&ppr->pr_mtx);
|
||||
prison_knote(ppr, NOTE_JAIL_CHILD | pr->pr_id);
|
||||
mtx_lock(&pr->pr_mtx);
|
||||
drflags |= PD_LOCKED;
|
||||
pr->pr_state = PRISON_STATE_ALIVE;
|
||||
|
||||
+10
-11
@@ -205,19 +205,18 @@ struct freebsd11_kevent32 {
|
||||
#define NOTE_PCTRLMASK 0xf0000000 /* mask for hint bits */
|
||||
#define NOTE_PDATAMASK 0x000fffff /* mask for pid */
|
||||
|
||||
/* data/hint flags for EVFILT_JAIL */
|
||||
#define NOTE_JAIL_SET 0x80000000 /* jail was modified */
|
||||
#define NOTE_JAIL_CHILD 0x40000000 /* child jail was created */
|
||||
#define NOTE_JAIL_ATTACH 0x20000000 /* jail was attached to */
|
||||
#define NOTE_JAIL_REMOVE 0x10000000 /* jail was removed */
|
||||
#define NOTE_JAIL_ATTACH_MULTI 0x08000000 /* multiple procs attached */
|
||||
#define NOTE_JAIL_CTRLMASK 0xf0000000 /* mask for hint bits */
|
||||
#define NOTE_JAIL_DATAMASK 0x000fffff /* mask for pid */
|
||||
|
||||
/* additional flags for EVFILT_PROC and EVFILT_JAIL */
|
||||
/* additional flags for EVFILT_PROC */
|
||||
#define NOTE_TRACK 0x00000001 /* follow across fork/create */
|
||||
#define NOTE_TRACKERR 0x00000002 /* could not track child */
|
||||
#define NOTE_CHILD 0x00000004 /* am a child process/jail */
|
||||
#define NOTE_CHILD 0x00000004 /* am a child process */
|
||||
|
||||
/* data/hint flags for EVFILT_JAIL */
|
||||
#define NOTE_JAIL_CHILD 0x80000000 /* child jail was created */
|
||||
#define NOTE_JAIL_SET 0x40000000 /* jail was modified */
|
||||
#define NOTE_JAIL_ATTACH 0x20000000 /* jail was attached to */
|
||||
#define NOTE_JAIL_REMOVE 0x10000000 /* jail was removed */
|
||||
#define NOTE_JAIL_MULTI 0x08000000 /* multiple child or attach */
|
||||
#define NOTE_JAIL_CTRLMASK 0xf0000000 /* mask for hint bits */
|
||||
|
||||
/* additional flags for EVFILT_TIMER */
|
||||
#define NOTE_SECONDS 0x00000001 /* data is seconds */
|
||||
|
||||
Reference in New Issue
Block a user