diff --git a/lib/libsys/kqueue.2 b/lib/libsys/kqueue.2 index aafb5317c5e..96c9b0222a3 100644 --- a/lib/libsys/kqueue.2 +++ b/lib/libsys/kqueue.2 @@ -22,7 +22,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd September 11, 2025 +.Dd September 12, 2025 .Dt KQUEUE 2 .Os .Sh NAME @@ -638,6 +638,19 @@ or .Dv NOTE_JAIL_CHILD event has been received since the last call to .Fn kevent . +.It Dv EVFILT_JAILDESC +Takes a jail descriptor returned by +.Xr jail_set 2 +or +.Xr jail_get 2 +as the identifier and the events to watch for in +.Va fflags , +and returns when the jail performs one or more of the requested events. +The events to monitor and the resulting +.Va fflags +are the same as those listed in +.Dv EVFILT_JAIL , +above. .It Dv EVFILT_TIMER Establishes an arbitrary timer identified by .Va ident . diff --git a/sys/kern/kern_event.c b/sys/kern/kern_event.c index 59a69ccddb3..c14ec398cb0 100644 --- a/sys/kern/kern_event.c +++ b/sys/kern/kern_event.c @@ -51,6 +51,7 @@ #include #include #include +#include #include #include #include @@ -376,6 +377,7 @@ static struct { [~EVFILT_SENDFILE] = { &null_filtops }, [~EVFILT_EMPTY] = { &file_filtops, 1 }, [~EVFILT_JAIL] = { &jail_filtops, 1 }, + [~EVFILT_JAILDESC] = { &file_filtops, 1 }, }; /* @@ -682,15 +684,15 @@ filt_jail(struct knote *kn, long hint) (u_int)hint & NOTE_JAIL_CTRLMASK; /* If the user is interested in this event, record it. */ - if (kn->kn_sfflags & event) + if (kn->kn_sfflags & event) { kn->kn_fflags |= event; - - /* Report the created jail id or attached process id. */ - if (event == NOTE_JAIL_CHILD || event == NOTE_JAIL_ATTACH) { - if (kn->kn_data != 0) - kn->kn_fflags |= NOTE_JAIL_MULTI; - kn->kn_data = (kn->kn_fflags & NOTE_JAIL_MULTI) ? 0U : - (u_int)hint & ~event; + /* Report the created jail id or attached process id. */ + if (event == NOTE_JAIL_CHILD || event == NOTE_JAIL_ATTACH) { + if (kn->kn_data != 0) + kn->kn_fflags |= NOTE_JAIL_MULTI; + kn->kn_data = (kn->kn_fflags & NOTE_JAIL_MULTI) ? 0U : + (u_int)hint & ~event; + } } /* Prison is gone, so flag the event as finished. */ diff --git a/sys/kern/kern_jail.c b/sys/kern/kern_jail.c index d90ccf4a04c..43035dc009b 100644 --- a/sys/kern/kern_jail.c +++ b/sys/kern/kern_jail.c @@ -5371,6 +5371,7 @@ prison_knote(struct prison *pr, long hint) if (!locked) mtx_lock(&pr->pr_mtx); KNOTE_LOCKED(pr->pr_klist, hint); + jaildesc_knote(pr, hint); if (!locked) mtx_unlock(&pr->pr_mtx); } diff --git a/sys/kern/kern_jaildesc.c b/sys/kern/kern_jaildesc.c index c9e80f5d894..3f322b27140 100644 --- a/sys/kern/kern_jaildesc.c +++ b/sys/kern/kern_jaildesc.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -46,6 +47,8 @@ MALLOC_DEFINE(M_JAILDESC, "jaildesc", "jail descriptors"); +static fo_poll_t jaildesc_poll; +static fo_kqfilter_t jaildesc_kqfilter; static fo_stat_t jaildesc_stat; static fo_close_t jaildesc_close; static fo_fill_kinfo_t jaildesc_fill_kinfo; @@ -56,8 +59,8 @@ static struct fileops jaildesc_ops = { .fo_write = invfo_rdwr, .fo_truncate = invfo_truncate, .fo_ioctl = invfo_ioctl, - .fo_poll = invfo_poll, - .fo_kqfilter = invfo_kqfilter, + .fo_poll = jaildesc_poll, + .fo_kqfilter = jaildesc_kqfilter, .fo_stat = jaildesc_stat, .fo_close = jaildesc_close, .fo_chmod = invfo_chmod, @@ -135,6 +138,7 @@ jaildesc_alloc(struct thread *td, struct file **fpp, int *fdp, int owning) finit(fp, priv_check_cred(fp->f_cred, PRIV_JAIL_SET) == 0 ? FREAD | FWRITE : FREAD, DTYPE_JAILDESC, jd, &jaildesc_ops); JAILDESC_LOCK_INIT(jd); + knlist_init_mtx(&jd->jd_selinfo.si_note, &jd->jd_lock); if (owning) jd->jd_flags |= JDF_OWNING; *fpp = fp; @@ -176,6 +180,36 @@ jaildesc_prison_cleanup(struct prison *pr) } } +/* + * Pass a note to all listening kqueues. + */ +void +jaildesc_knote(struct prison *pr, long hint) +{ + struct jaildesc *jd; + int prison_locked; + + if (!LIST_EMPTY(&pr->pr_descs)) { + prison_locked = mtx_owned(&pr->pr_mtx); + if (!prison_locked) + prison_lock(pr); + LIST_FOREACH(jd, &pr->pr_descs, jd_list) { + JAILDESC_LOCK(jd); + if (hint == NOTE_JAIL_REMOVE) { + jd->jd_flags |= JDF_REMOVED; + if (jd->jd_flags & JDF_SELECTED) { + jd->jd_flags &= ~JDF_SELECTED; + selwakeup(&jd->jd_selinfo); + } + } + KNOTE_LOCKED(&jd->jd_selinfo.si_note, hint); + JAILDESC_UNLOCK(jd); + } + if (!prison_locked) + prison_unlock(pr); + } +} + static int jaildesc_close(struct file *fp, struct thread *td) { @@ -223,12 +257,112 @@ jaildesc_close(struct file *fp, struct thread *td) } prison_free(pr); } + knlist_destroy(&jd->jd_selinfo.si_note); JAILDESC_LOCK_DESTROY(jd); free(jd, M_JAILDESC); } return (0); } +static int +jaildesc_poll(struct file *fp, int events, struct ucred *active_cred, + struct thread *td) +{ + struct jaildesc *jd; + int revents; + + revents = 0; + jd = fp->f_data; + JAILDESC_LOCK(jd); + if (jd->jd_flags & JDF_REMOVED) + revents |= POLLHUP; + if (revents == 0) { + selrecord(td, &jd->jd_selinfo); + jd->jd_flags |= JDF_SELECTED; + } + JAILDESC_UNLOCK(jd); + return (revents); +} + +static void +jaildesc_kqops_detach(struct knote *kn) +{ + struct jaildesc *jd; + + jd = kn->kn_fp->f_data; + knlist_remove(&jd->jd_selinfo.si_note, kn, 0); +} + +static int +jaildesc_kqops_event(struct knote *kn, long hint) +{ + struct jaildesc *jd; + u_int event; + + jd = kn->kn_fp->f_data; + if (hint == 0) { + /* + * Initial test after registration. Generate a + * NOTE_JAIL_REMOVE in case the prison already died + * before registration. + */ + event = jd->jd_flags & JDF_REMOVED ? NOTE_JAIL_REMOVE : 0; + } else { + /* + * Mask off extra data. In the NOTE_JAIL_CHILD case, + * that's everything except the NOTE_JAIL_CHILD bit + * itself, since a JID is any positive integer. + */ + event = ((u_int)hint & NOTE_JAIL_CHILD) ? NOTE_JAIL_CHILD : + (u_int)hint & NOTE_JAIL_CTRLMASK; + } + + /* If the user is interested in this event, record it. */ + if (kn->kn_sfflags & event) { + kn->kn_fflags |= event; + /* Report the created jail id or attached process id. */ + if (event == NOTE_JAIL_CHILD || event == NOTE_JAIL_ATTACH) { + if (kn->kn_data != 0) + kn->kn_fflags |= NOTE_JAIL_MULTI; + kn->kn_data = (kn->kn_fflags & NOTE_JAIL_MULTI) ? 0U : + (u_int)hint & ~event; + } + } + + /* Prison is gone, so flag the event as finished. */ + if (event == NOTE_JAIL_REMOVE) { + kn->kn_flags |= EV_EOF | EV_ONESHOT; + if (kn->kn_fflags == 0) + kn->kn_flags |= EV_DROP; + return (1); + } + + return (kn->kn_fflags != 0); +} + +static const struct filterops jaildesc_kqops = { + .f_isfd = 1, + .f_detach = jaildesc_kqops_detach, + .f_event = jaildesc_kqops_event, +}; + +static int +jaildesc_kqfilter(struct file *fp, struct knote *kn) +{ + struct jaildesc *jd; + + jd = fp->f_data; + switch (kn->kn_filter) { + case EVFILT_JAILDESC: + kn->kn_fop = &jaildesc_kqops; + kn->kn_flags |= EV_CLEAR; + knlist_add(&jd->jd_selinfo.si_note, kn, 0); + return (0); + default: + return (EINVAL); + } +} + static int jaildesc_stat(struct file *fp, struct stat *sb, struct ucred *active_cred) { diff --git a/sys/sys/event.h b/sys/sys/event.h index 91fbaa4834f..084eaafcbdc 100644 --- a/sys/sys/event.h +++ b/sys/sys/event.h @@ -46,7 +46,8 @@ #define EVFILT_SENDFILE (-12) /* attached to sendfile requests */ #define EVFILT_EMPTY (-13) /* empty send socket buf */ #define EVFILT_JAIL (-14) /* attached to struct prison */ -#define EVFILT_SYSCOUNT 14 +#define EVFILT_JAILDESC (-15) /* attached to jail descriptors */ +#define EVFILT_SYSCOUNT 15 #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L #define EV_SET(kevp_, a, b, c, d, e, f) do { \ @@ -210,7 +211,7 @@ struct freebsd11_kevent32 { #define NOTE_TRACKERR 0x00000002 /* could not track child */ #define NOTE_CHILD 0x00000004 /* am a child process */ -/* data/hint flags for EVFILT_JAIL */ +/* data/hint flags for EVFILT_JAIL and EVFILT_JAILDESC */ #define NOTE_JAIL_CHILD 0x80000000 /* child jail was created */ #define NOTE_JAIL_SET 0x40000000 /* jail was modified */ #define NOTE_JAIL_ATTACH 0x20000000 /* jail was attached to */ diff --git a/sys/sys/jaildesc.h b/sys/sys/jaildesc.h index 2451b04f730..fda270d62e7 100644 --- a/sys/sys/jaildesc.h +++ b/sys/sys/jaildesc.h @@ -35,6 +35,7 @@ #ifdef _KERNEL #include +#include #include #include #include @@ -54,6 +55,7 @@ struct jaildesc { LIST_ENTRY(jaildesc) jd_list; /* (d,p) this prison's descs */ struct prison *jd_prison; /* (d) the prison */ struct mtx jd_lock; + struct selinfo jd_selinfo; /* (d) event notification */ unsigned jd_flags; /* (d) JDF_* flags */ }; @@ -69,6 +71,7 @@ struct jaildesc { /* * Flags for the jd_flags field */ +#define JDF_SELECTED 0x00000001 /* issue selwakeup() */ #define JDF_REMOVED 0x00000002 /* jail was removed */ #define JDF_OWNING 0x00000004 /* closing descriptor removes jail */ @@ -77,6 +80,7 @@ int jaildesc_find(struct thread *td, int fd, struct prison **prp, int jaildesc_alloc(struct thread *td, struct file **fpp, int *fdp, int owning); void jaildesc_set_prison(struct file *jd, struct prison *pr); void jaildesc_prison_cleanup(struct prison *pr); +void jaildesc_knote(struct prison *pr, long hint); #endif /* _KERNEL */