jaildesc: add kevent support

Give jail descriptors the same kevent flags as jails.  Also fix the
event reporting in jails, where it was including data for events the
user didn't ask for.

MFC after:	3 days
This commit is contained in:
Jamie Gritton
2025-09-12 11:33:19 -07:00
parent ab2fea3f9a
commit 66d8ffe304
6 changed files with 168 additions and 13 deletions
+14 -1
View File
@@ -22,7 +22,7 @@
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
.Dd September 11, 2025
.Dd September 12, 2025
.Dt KQUEUE 2
.Os
.Sh NAME
@@ -638,6 +638,19 @@ or
.Dv NOTE_JAIL_CHILD
event has been received since the last call to
.Fn kevent .
.It Dv EVFILT_JAILDESC
Takes a jail descriptor returned by
.Xr jail_set 2
or
.Xr jail_get 2
as the identifier and the events to watch for in
.Va fflags ,
and returns when the jail performs one or more of the requested events.
The events to monitor and the resulting
.Va fflags
are the same as those listed in
.Dv EVFILT_JAIL ,
above.
.It Dv EVFILT_TIMER
Establishes an arbitrary timer identified by
.Va ident .
+10 -8
View File
@@ -51,6 +51,7 @@
#include <sys/filio.h>
#include <sys/fcntl.h>
#include <sys/jail.h>
#include <sys/jaildesc.h>
#include <sys/kthread.h>
#include <sys/selinfo.h>
#include <sys/queue.h>
@@ -376,6 +377,7 @@ static struct {
[~EVFILT_SENDFILE] = { &null_filtops },
[~EVFILT_EMPTY] = { &file_filtops, 1 },
[~EVFILT_JAIL] = { &jail_filtops, 1 },
[~EVFILT_JAILDESC] = { &file_filtops, 1 },
};
/*
@@ -682,15 +684,15 @@ filt_jail(struct knote *kn, long hint)
(u_int)hint & NOTE_JAIL_CTRLMASK;
/* If the user is interested in this event, record it. */
if (kn->kn_sfflags & event)
if (kn->kn_sfflags & event) {
kn->kn_fflags |= event;
/* Report the created jail id or attached process id. */
if (event == NOTE_JAIL_CHILD || event == NOTE_JAIL_ATTACH) {
if (kn->kn_data != 0)
kn->kn_fflags |= NOTE_JAIL_MULTI;
kn->kn_data = (kn->kn_fflags & NOTE_JAIL_MULTI) ? 0U :
(u_int)hint & ~event;
/* Report the created jail id or attached process id. */
if (event == NOTE_JAIL_CHILD || event == NOTE_JAIL_ATTACH) {
if (kn->kn_data != 0)
kn->kn_fflags |= NOTE_JAIL_MULTI;
kn->kn_data = (kn->kn_fflags & NOTE_JAIL_MULTI) ? 0U :
(u_int)hint & ~event;
}
}
/* Prison is gone, so flag the event as finished. */
+1
View File
@@ -5371,6 +5371,7 @@ prison_knote(struct prison *pr, long hint)
if (!locked)
mtx_lock(&pr->pr_mtx);
KNOTE_LOCKED(pr->pr_klist, hint);
jaildesc_knote(pr, hint);
if (!locked)
mtx_unlock(&pr->pr_mtx);
}
+136 -2
View File
@@ -36,6 +36,7 @@
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mutex.h>
#include <sys/poll.h>
#include <sys/priv.h>
#include <sys/stat.h>
#include <sys/sysproto.h>
@@ -46,6 +47,8 @@
MALLOC_DEFINE(M_JAILDESC, "jaildesc", "jail descriptors");
static fo_poll_t jaildesc_poll;
static fo_kqfilter_t jaildesc_kqfilter;
static fo_stat_t jaildesc_stat;
static fo_close_t jaildesc_close;
static fo_fill_kinfo_t jaildesc_fill_kinfo;
@@ -56,8 +59,8 @@ static struct fileops jaildesc_ops = {
.fo_write = invfo_rdwr,
.fo_truncate = invfo_truncate,
.fo_ioctl = invfo_ioctl,
.fo_poll = invfo_poll,
.fo_kqfilter = invfo_kqfilter,
.fo_poll = jaildesc_poll,
.fo_kqfilter = jaildesc_kqfilter,
.fo_stat = jaildesc_stat,
.fo_close = jaildesc_close,
.fo_chmod = invfo_chmod,
@@ -135,6 +138,7 @@ jaildesc_alloc(struct thread *td, struct file **fpp, int *fdp, int owning)
finit(fp, priv_check_cred(fp->f_cred, PRIV_JAIL_SET) == 0 ?
FREAD | FWRITE : FREAD, DTYPE_JAILDESC, jd, &jaildesc_ops);
JAILDESC_LOCK_INIT(jd);
knlist_init_mtx(&jd->jd_selinfo.si_note, &jd->jd_lock);
if (owning)
jd->jd_flags |= JDF_OWNING;
*fpp = fp;
@@ -176,6 +180,36 @@ jaildesc_prison_cleanup(struct prison *pr)
}
}
/*
* Pass a note to all listening kqueues.
*/
void
jaildesc_knote(struct prison *pr, long hint)
{
struct jaildesc *jd;
int prison_locked;
if (!LIST_EMPTY(&pr->pr_descs)) {
prison_locked = mtx_owned(&pr->pr_mtx);
if (!prison_locked)
prison_lock(pr);
LIST_FOREACH(jd, &pr->pr_descs, jd_list) {
JAILDESC_LOCK(jd);
if (hint == NOTE_JAIL_REMOVE) {
jd->jd_flags |= JDF_REMOVED;
if (jd->jd_flags & JDF_SELECTED) {
jd->jd_flags &= ~JDF_SELECTED;
selwakeup(&jd->jd_selinfo);
}
}
KNOTE_LOCKED(&jd->jd_selinfo.si_note, hint);
JAILDESC_UNLOCK(jd);
}
if (!prison_locked)
prison_unlock(pr);
}
}
static int
jaildesc_close(struct file *fp, struct thread *td)
{
@@ -223,12 +257,112 @@ jaildesc_close(struct file *fp, struct thread *td)
}
prison_free(pr);
}
knlist_destroy(&jd->jd_selinfo.si_note);
JAILDESC_LOCK_DESTROY(jd);
free(jd, M_JAILDESC);
}
return (0);
}
static int
jaildesc_poll(struct file *fp, int events, struct ucred *active_cred,
struct thread *td)
{
struct jaildesc *jd;
int revents;
revents = 0;
jd = fp->f_data;
JAILDESC_LOCK(jd);
if (jd->jd_flags & JDF_REMOVED)
revents |= POLLHUP;
if (revents == 0) {
selrecord(td, &jd->jd_selinfo);
jd->jd_flags |= JDF_SELECTED;
}
JAILDESC_UNLOCK(jd);
return (revents);
}
static void
jaildesc_kqops_detach(struct knote *kn)
{
struct jaildesc *jd;
jd = kn->kn_fp->f_data;
knlist_remove(&jd->jd_selinfo.si_note, kn, 0);
}
static int
jaildesc_kqops_event(struct knote *kn, long hint)
{
struct jaildesc *jd;
u_int event;
jd = kn->kn_fp->f_data;
if (hint == 0) {
/*
* Initial test after registration. Generate a
* NOTE_JAIL_REMOVE in case the prison already died
* before registration.
*/
event = jd->jd_flags & JDF_REMOVED ? NOTE_JAIL_REMOVE : 0;
} else {
/*
* Mask off extra data. In the NOTE_JAIL_CHILD case,
* that's everything except the NOTE_JAIL_CHILD bit
* itself, since a JID is any positive integer.
*/
event = ((u_int)hint & NOTE_JAIL_CHILD) ? NOTE_JAIL_CHILD :
(u_int)hint & NOTE_JAIL_CTRLMASK;
}
/* If the user is interested in this event, record it. */
if (kn->kn_sfflags & event) {
kn->kn_fflags |= event;
/* Report the created jail id or attached process id. */
if (event == NOTE_JAIL_CHILD || event == NOTE_JAIL_ATTACH) {
if (kn->kn_data != 0)
kn->kn_fflags |= NOTE_JAIL_MULTI;
kn->kn_data = (kn->kn_fflags & NOTE_JAIL_MULTI) ? 0U :
(u_int)hint & ~event;
}
}
/* Prison is gone, so flag the event as finished. */
if (event == NOTE_JAIL_REMOVE) {
kn->kn_flags |= EV_EOF | EV_ONESHOT;
if (kn->kn_fflags == 0)
kn->kn_flags |= EV_DROP;
return (1);
}
return (kn->kn_fflags != 0);
}
static const struct filterops jaildesc_kqops = {
.f_isfd = 1,
.f_detach = jaildesc_kqops_detach,
.f_event = jaildesc_kqops_event,
};
static int
jaildesc_kqfilter(struct file *fp, struct knote *kn)
{
struct jaildesc *jd;
jd = fp->f_data;
switch (kn->kn_filter) {
case EVFILT_JAILDESC:
kn->kn_fop = &jaildesc_kqops;
kn->kn_flags |= EV_CLEAR;
knlist_add(&jd->jd_selinfo.si_note, kn, 0);
return (0);
default:
return (EINVAL);
}
}
static int
jaildesc_stat(struct file *fp, struct stat *sb, struct ucred *active_cred)
{
+3 -2
View File
@@ -46,7 +46,8 @@
#define EVFILT_SENDFILE (-12) /* attached to sendfile requests */
#define EVFILT_EMPTY (-13) /* empty send socket buf */
#define EVFILT_JAIL (-14) /* attached to struct prison */
#define EVFILT_SYSCOUNT 14
#define EVFILT_JAILDESC (-15) /* attached to jail descriptors */
#define EVFILT_SYSCOUNT 15
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
#define EV_SET(kevp_, a, b, c, d, e, f) do { \
@@ -210,7 +211,7 @@ struct freebsd11_kevent32 {
#define NOTE_TRACKERR 0x00000002 /* could not track child */
#define NOTE_CHILD 0x00000004 /* am a child process */
/* data/hint flags for EVFILT_JAIL */
/* data/hint flags for EVFILT_JAIL and EVFILT_JAILDESC */
#define NOTE_JAIL_CHILD 0x80000000 /* child jail was created */
#define NOTE_JAIL_SET 0x40000000 /* jail was modified */
#define NOTE_JAIL_ATTACH 0x20000000 /* jail was attached to */
+4
View File
@@ -35,6 +35,7 @@
#ifdef _KERNEL
#include <sys/queue.h>
#include <sys/selinfo.h>
#include <sys/_lock.h>
#include <sys/_mutex.h>
#include <sys/_types.h>
@@ -54,6 +55,7 @@ struct jaildesc {
LIST_ENTRY(jaildesc) jd_list; /* (d,p) this prison's descs */
struct prison *jd_prison; /* (d) the prison */
struct mtx jd_lock;
struct selinfo jd_selinfo; /* (d) event notification */
unsigned jd_flags; /* (d) JDF_* flags */
};
@@ -69,6 +71,7 @@ struct jaildesc {
/*
* Flags for the jd_flags field
*/
#define JDF_SELECTED 0x00000001 /* issue selwakeup() */
#define JDF_REMOVED 0x00000002 /* jail was removed */
#define JDF_OWNING 0x00000004 /* closing descriptor removes jail */
@@ -77,6 +80,7 @@ int jaildesc_find(struct thread *td, int fd, struct prison **prp,
int jaildesc_alloc(struct thread *td, struct file **fpp, int *fdp, int owning);
void jaildesc_set_prison(struct file *jd, struct prison *pr);
void jaildesc_prison_cleanup(struct prison *pr);
void jaildesc_knote(struct prison *pr, long hint);
#endif /* _KERNEL */