diff --git a/lib/libc/sys/Makefile.inc b/lib/libc/sys/Makefile.inc index ddc157ee1b5..fe5061d97f3 100644 --- a/lib/libc/sys/Makefile.inc +++ b/lib/libc/sys/Makefile.inc @@ -96,7 +96,7 @@ MAN+= abort2.2 accept.2 access.2 acct.2 adjtime.2 \ mq_setattr.2 \ msgctl.2 msgget.2 msgrcv.2 msgsnd.2 \ msync.2 munmap.2 nanosleep.2 nfssvc.2 ntp_adjtime.2 open.2 \ - pathconf.2 pipe.2 poll.2 posix_fallocate.2 posix_openpt.2 profil.2 \ + pathconf.2 pdfork.2 pipe.2 poll.2 posix_fallocate.2 posix_openpt.2 profil.2 \ pselect.2 ptrace.2 quotactl.2 \ read.2 readlink.2 reboot.2 recv.2 rename.2 revoke.2 rfork.2 rmdir.2 \ rtprio.2 @@ -178,6 +178,9 @@ MLINKS+=ntp_adjtime.2 ntp_gettime.2 MLINKS+=open.2 openat.2 MLINKS+=pathconf.2 fpathconf.2 MLINKS+=pathconf.2 lpathconf.2 +MLINKS+=pdfork.2 pdgetpid.2\ + pdfork.2 pdkill.2 \ + pdfork.2 pdwait4.2 MLINKS+=read.2 pread.2 read.2 preadv.2 read.2 readv.2 MLINKS+=readlink.2 readlinkat.2 MLINKS+=recv.2 recvfrom.2 recv.2 recvmsg.2 diff --git a/lib/libc/sys/Symbol.map b/lib/libc/sys/Symbol.map index 547a2cff972..095751a441c 100644 --- a/lib/libc/sys/Symbol.map +++ b/lib/libc/sys/Symbol.map @@ -366,6 +366,9 @@ FBSD_1.2 { cap_new; cap_getrights; getloginclass; + pdfork; + pdgetpid; + pdkill; posix_fallocate; rctl_get_racct; rctl_get_rules; diff --git a/lib/libc/sys/cap_new.2 b/lib/libc/sys/cap_new.2 index 7710e12abdf..206715e84b4 100644 --- a/lib/libc/sys/cap_new.2 +++ b/lib/libc/sys/cap_new.2 @@ -260,7 +260,7 @@ Permit .Xr pdkill 2 . .It Dv CAP_PDWAIT Permit -.Xr pdwait 2 . +.Xr pdwait4 2 . .It Dv CAP_PEELOFF Permit .Xr sctp_peeloff 2 . @@ -429,7 +429,7 @@ argument is not a capability. .Xr openat 2 , .Xr pdgetpid 2 , .Xr pdkill 2 , -.Xr pdwait 2 , +.Xr pdwait4 2 , .Xr pipe 2 , .Xr poll 2 , .Xr pread 2 , diff --git a/lib/libc/sys/pdfork.2 b/lib/libc/sys/pdfork.2 new file mode 100644 index 00000000000..3f36e881ec9 --- /dev/null +++ b/lib/libc/sys/pdfork.2 @@ -0,0 +1,182 @@ +.\" +.\" Copyright (c) 2009-2010 Robert N. M. Watson +.\" All rights reserved. +.\" +.\" This software was developed at the University of Cambridge Computer +.\" Laboratory with support from a grant from Google, Inc. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.Dd August 16, 2011 +.Dt PDFORK 2 +.Os +.Sh NAME +.Nm pdfork , +.Nm pdgetpid , +.Nm pdkill , +.Nm pdwait4 +.Nd System calls to manage process descriptors +.Sh LIBRARY +.Lb libc +.Sh SYNOPSIS +.In sys/procdesc.h +.Ft int +.Fn pdfork "int *fdp" "int flags" +.Ft int +.Fn pdgetpid "int fd" "pid_t *pidp" +.Ft int +.Fn pdkill "int fd" "int signum" +.Ft int +.Fn pdwait4 "int fd" "int *status" "int options" "struct rusage *rusage" +.Sh DESCRIPTION +Process descriptors are special file descriptors that represent processes, +and are created using +.Fn pdfork , +a variant of +.Xr fork 2 , +which, if successful, returns a process descriptor in the integer pointed to +by +.Fa pidp . +Processes created via +.Fn pdfork +will not cause +.Dv SIGCHLD +on termination. +.Fn pdfork +can accept the flags: +.Bl -tag -width ".Dv PD_DAEMON" +.It Dv PD_DAEMON +Instead of the default terminate-on-close behaviour, allow the process to +live until it is explicitly killed with +.Xr kill 2 . +.Pp +This option is not permitted in Capsicum capability mode (see +.Xr cap_enter 2 ) . +.El +.Pp +.Fn pdgetpid +queries the process ID (PID) if the process descriptor +.Fa fd . +.Pp +.Fn pdkill +is functionally identical to +.Xr kill 2 , +except that it accepts a process descriptor, +.Fa fd , +rather than a PID. +.Pp +.Fn pdwait4 +behaves identially to +.Xr wait4 2 , +but operates with respect to a process descriptor argument rather than a PID. +.Pp +The following system calls also have effects specific to process descriptors: +.Pp +.Xr fstat 2 +queries status of a process descriptor; currently only the +.Fa st_mode , +.Fa st_birthtime , +.Fa st_atime , +.Fa st_ctime +and +.Fa st_mtime +fields are defined. If the owner read, write, and execute bits are set then the +process represented by the process descriptor is still alive. +.Pp +.Xr poll 2 +and +.Xr select 2 +allow waiting for process state transitions; currently only +.Dv POLLHUP +is defined, and will be raised when the process dies. +.Pp +.Xr close 2 +will close the process descriptor unless +.Dv PD_DAEMON +is set; if the process is still alive and this is +the last reference to the process descriptor, the process will be terminated +with the signal +.Dv SIGKILL . +.Sh RETURN VALUES +.Fn pdfork +returns a PID, 0 or -1, as +.Xr fork 2 +does. +.Pp +.Fn pdgetpid +and +.Fn pdkill +return 0 on success and -1 on failure. +.Pp +.Fn pdwait4 +returns a PID on success and -1 on failure. +.Sh ERRORS +These functions may return the same error numbers as their PID-based equivalents +(e.g. +.Fn pdfork +may return the same error numbers as +.Xr fork 2 ) , +with the following additions: +.Bl -tag -width Er +.It Bq Er EINVAL +The signal number given to +.Fn pdkill +is invalid. +.It Bq Er ENOTCAPABLE +The process descriptor being operated on has insufficient rights (e.g. +.Dv CAP_PDKILL +for +.Fn pdkill ) . +.El +.Sh SEE ALSO +.Xr close 2 , +.Xr fork 2 , +.Xr fstat 2 , +.Xr kill 2 , +.Xr poll 2 , +.Xr wait4 2 +.Sh HISTORY +The +.Fn pdfork , +.Fn pdgetpid , +.Fn pdkill +and +.Fn pdwait4 +system calls first appeared in +.Fx 9.0 . +.Pp +Support for process descriptors mode was developed as part of the +.Tn TrustedBSD +Project. +.Sh AUTHORS +.An -nosplit +These functions and the capability facility were created by +.An "Robert N. M. Watson" Aq rwatson@FreeBSD.org +and +.An "Jonathan Anderson" Aq jonathan@FreeBSD.org +at the University of Cambridge Computer Laboratory with support from a grant +from Google, Inc. +.Sh BUGS +.Fn pdwait4 +has not yet been implemented. diff --git a/sys/compat/linux/linux_fork.c b/sys/compat/linux/linux_fork.c index bf1d45c9ea3..5d2ce5bdb0c 100644 --- a/sys/compat/linux/linux_fork.c +++ b/sys/compat/linux/linux_fork.c @@ -64,7 +64,8 @@ linux_fork(struct thread *td, struct linux_fork_args *args) printf(ARGS(fork, "")); #endif - if ((error = fork1(td, RFFDG | RFPROC | RFSTOPPED, 0, &p2)) != 0) + if ((error = fork1(td, RFFDG | RFPROC | RFSTOPPED, 0, &p2, NULL, 0)) + != 0) return (error); td->td_retval[0] = p2->p_pid; @@ -100,7 +101,8 @@ linux_vfork(struct thread *td, struct linux_vfork_args *args) #endif /* Exclude RFPPWAIT */ - if ((error = fork1(td, RFFDG | RFPROC | RFMEM | RFSTOPPED, 0, &p2)) != 0) + if ((error = fork1(td, RFFDG | RFPROC | RFMEM | RFSTOPPED, 0, &p2, + NULL, 0)) != 0) return (error); td->td_retval[0] = p2->p_pid; @@ -190,7 +192,7 @@ linux_clone(struct thread *td, struct linux_clone_args *args) if (args->parent_tidptr == NULL) return (EINVAL); - error = fork1(td, ff, 0, &p2); + error = fork1(td, ff, 0, &p2, NULL, 0); if (error) return (error); diff --git a/sys/conf/NOTES b/sys/conf/NOTES index 4a9ec35d038..59f02c812a1 100644 --- a/sys/conf/NOTES +++ b/sys/conf/NOTES @@ -1159,6 +1159,9 @@ options MAC_TEST options CAPABILITIES # fine-grained rights on file descriptors options CAPABILITY_MODE # sandboxes with no global namespace access +# Support for process descriptors +options PROCDESC + ##################################################################### # CLOCK OPTIONS diff --git a/sys/conf/files b/sys/conf/files index 0dc814e0934..5c5d92d6b16 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -2412,6 +2412,7 @@ kern/subr_witness.c optional witness kern/sys_capability.c standard kern/sys_generic.c standard kern/sys_pipe.c standard +kern/sys_procdesc.c standard kern/sys_process.c standard kern/sys_socket.c standard kern/syscalls.c standard diff --git a/sys/conf/options b/sys/conf/options index f7026c13417..27fdbedacb6 100644 --- a/sys/conf/options +++ b/sys/conf/options @@ -149,6 +149,7 @@ PPC_DEBUG opt_ppc.h PPC_PROBE_CHIPSET opt_ppc.h PPS_SYNC opt_ntp.h PREEMPTION opt_sched.h +PROCDESC opt_procdesc.h QUOTA SCHED_4BSD opt_sched.h SCHED_STATS opt_sched.h diff --git a/sys/kern/capabilities.conf b/sys/kern/capabilities.conf index 004c2ddf207..4a62643623e 100644 --- a/sys/kern/capabilities.conf +++ b/sys/kern/capabilities.conf @@ -475,7 +475,7 @@ openbsd_poll pdfork pdgetpid pdkill -pdwait4 +#pdwait4 # not yet implemented ## ## Allow pipe(2). diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c index be5c26fec02..fc072457c19 100644 --- a/sys/kern/init_main.c +++ b/sys/kern/init_main.c @@ -790,7 +790,8 @@ create_init(const void *udata __unused) struct ucred *newcred, *oldcred; int error; - error = fork1(&thread0, RFFDG | RFPROC | RFSTOPPED, 0, &initproc); + error = fork1(&thread0, RFFDG | RFPROC | RFSTOPPED, 0, &initproc, + NULL, 0); if (error) panic("cannot fork init: %d\n", error); KASSERT(initproc->p_pid == 1, ("create_init: initproc->p_pid != 1")); diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c index 85f866c39c0..4aaed1f040e 100644 --- a/sys/kern/kern_descrip.c +++ b/sys/kern/kern_descrip.c @@ -41,6 +41,7 @@ __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include "opt_ddb.h" #include "opt_ktrace.h" +#include "opt_procdesc.h" #include #include @@ -65,6 +66,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -120,6 +122,8 @@ static int fill_vnode_info(struct vnode *vp, struct kinfo_file *kif); static int fill_socket_info(struct socket *so, struct kinfo_file *kif); static int fill_pts_info(struct tty *tp, struct kinfo_file *kif); static int fill_pipe_info(struct pipe *pi, struct kinfo_file *kif); +static int fill_procdesc_info(struct procdesc *pdp, + struct kinfo_file *kif); /* * A process is initially started out with NDFILE descriptors stored within @@ -3056,6 +3060,12 @@ sysctl_kern_proc_ofiledesc(SYSCTL_HANDLER_ARGS) tp = fp->f_data; break; +#ifdef PROCDESC + case DTYPE_PROCDESC: + kif->kf_type = KF_TYPE_PROCDESC; + break; +#endif + default: kif->kf_type = KF_TYPE_UNKNOWN; break; @@ -3218,6 +3228,9 @@ export_fd_for_sysctl(void *data, int type, int fd, int fflags, int refcnt, case KF_TYPE_PTS: error = fill_pts_info((struct tty *)data, kif); break; + case KF_TYPE_PROCDESC: + error = fill_procdesc_info((struct procdesc *)data, kif); + break; default: error = 0; } @@ -3391,6 +3404,13 @@ sysctl_kern_proc_filedesc(SYSCTL_HANDLER_ARGS) data = fp->f_data; break; +#ifdef PROCDESC + case DTYPE_PROCDESC: + type = KF_TYPE_PROCDESC; + data = fp->f_data; + break; +#endif + default: type = KF_TYPE_UNKNOWN; break; @@ -3586,6 +3606,16 @@ fill_pipe_info(struct pipe *pi, struct kinfo_file *kif) return (0); } +static int +fill_procdesc_info(struct procdesc *pdp, struct kinfo_file *kif) +{ + + if (pdp == NULL) + return (1); + kif->kf_un.kf_proc.kf_pid = pdp->pd_pid; + return (0); +} + static SYSCTL_NODE(_kern_proc, KERN_PROC_FILEDESC, filedesc, CTLFLAG_RD, sysctl_kern_proc_filedesc, "Process filedesc entries"); diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c index 30b94b6a28a..e5d60942f7c 100644 --- a/sys/kern/kern_exit.c +++ b/sys/kern/kern_exit.c @@ -40,16 +40,19 @@ __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include "opt_kdtrace.h" #include "opt_ktrace.h" +#include "opt_procdesc.h" #include #include #include +#include #include #include #include #include #include #include +#include #include #include #include @@ -461,39 +464,54 @@ exit1(struct thread *td, int rv) knlist_clear(&p->p_klist, 1); /* - * Notify parent that we're gone. If parent has the PS_NOCLDWAIT - * flag set, or if the handler is set to SIG_IGN, notify process - * 1 instead (and hope it will handle this situation). + * If this is a process with a descriptor, we may not need to deliver + * a signal to the parent. proctree_lock is held over + * procdesc_exit() to serialize concurrent calls to close() and + * exit(). */ - PROC_LOCK(p->p_pptr); - mtx_lock(&p->p_pptr->p_sigacts->ps_mtx); - if (p->p_pptr->p_sigacts->ps_flag & (PS_NOCLDWAIT | PS_CLDSIGIGN)) { - struct proc *pp; - - mtx_unlock(&p->p_pptr->p_sigacts->ps_mtx); - pp = p->p_pptr; - PROC_UNLOCK(pp); - proc_reparent(p, initproc); - p->p_sigparent = SIGCHLD; - PROC_LOCK(p->p_pptr); - +#ifdef PROCDESC + if (p->p_procdesc == NULL || procdesc_exit(p)) { +#endif /* - * Notify parent, so in case he was wait(2)ing or - * executing waitpid(2) with our pid, he will - * continue. + * Notify parent that we're gone. If parent has the + * PS_NOCLDWAIT flag set, or if the handler is set to SIG_IGN, + * notify process 1 instead (and hope it will handle this + * situation). */ - wakeup(pp); - } else - mtx_unlock(&p->p_pptr->p_sigacts->ps_mtx); + PROC_LOCK(p->p_pptr); + mtx_lock(&p->p_pptr->p_sigacts->ps_mtx); + if (p->p_pptr->p_sigacts->ps_flag & + (PS_NOCLDWAIT | PS_CLDSIGIGN)) { + struct proc *pp; - if (p->p_pptr == initproc) - psignal(p->p_pptr, SIGCHLD); - else if (p->p_sigparent != 0) { - if (p->p_sigparent == SIGCHLD) - childproc_exited(p); - else /* LINUX thread */ - psignal(p->p_pptr, p->p_sigparent); - } + mtx_unlock(&p->p_pptr->p_sigacts->ps_mtx); + pp = p->p_pptr; + PROC_UNLOCK(pp); + proc_reparent(p, initproc); + p->p_sigparent = SIGCHLD; + PROC_LOCK(p->p_pptr); + + /* + * Notify parent, so in case he was wait(2)ing or + * executing waitpid(2) with our pid, he will + * continue. + */ + wakeup(pp); + } else + mtx_unlock(&p->p_pptr->p_sigacts->ps_mtx); + + if (p->p_pptr == initproc) + psignal(p->p_pptr, SIGCHLD); + else if (p->p_sigparent != 0) { + if (p->p_sigparent == SIGCHLD) + childproc_exited(p); + else /* LINUX thread */ + psignal(p->p_pptr, p->p_sigparent); + } +#ifdef PROCDESC + } else + PROC_LOCK(p->p_pptr); +#endif sx_xunlock(&proctree_lock); /* @@ -660,7 +678,7 @@ wait4(struct thread *td, struct wait_args *uap) * rusage. Asserts and will release both the proctree_lock and the process * lock as part of its work. */ -static void +void proc_reap(struct thread *td, struct proc *p, int *status, int options, struct rusage *rusage) { @@ -722,6 +740,10 @@ proc_reap(struct thread *td, struct proc *p, int *status, int options, sx_xunlock(&allproc_lock); LIST_REMOVE(p, p_sibling); leavepgrp(p); +#ifdef PROCDESC + if (p->p_procdesc != NULL) + procdesc_reap(p); +#endif sx_xunlock(&proctree_lock); /* diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c index 9d3e22d224c..32d00550a81 100644 --- a/sys/kern/kern_fork.c +++ b/sys/kern/kern_fork.c @@ -40,11 +40,13 @@ __FBSDID("$FreeBSD$"); #include "opt_kdtrace.h" #include "opt_ktrace.h" #include "opt_kstack_pages.h" +#include "opt_procdesc.h" #include #include #include #include +#include #include #include #include @@ -55,6 +57,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -104,7 +107,7 @@ fork(struct thread *td, struct fork_args *uap) int error; struct proc *p2; - error = fork1(td, RFFDG | RFPROC, 0, &p2); + error = fork1(td, RFFDG | RFPROC, 0, &p2, NULL, 0); if (error == 0) { td->td_retval[0] = p2->p_pid; td->td_retval[1] = 0; @@ -112,6 +115,34 @@ fork(struct thread *td, struct fork_args *uap) return (error); } +/* ARGUSED */ +int +pdfork(td, uap) + struct thread *td; + struct pdfork_args *uap; +{ +#ifdef PROCDESC + int error, fd; + struct proc *p2; + + /* + * It is necessary to return fd by reference because 0 is a valid file + * descriptor number, and the child needs to be able to distinguish + * itself from the parent using the return value. + */ + error = fork1(td, RFFDG | RFPROC | RFPROCDESC, 0, &p2, + &fd, uap->flags); + if (error == 0) { + td->td_retval[0] = p2->p_pid; + td->td_retval[1] = 0; + error = copyout(&fd, uap->fdp, sizeof(fd)); + } + return (error); +#else + return (ENOSYS); +#endif +} + /* ARGSUSED */ int vfork(struct thread *td, struct vfork_args *uap) @@ -124,7 +155,7 @@ vfork(struct thread *td, struct vfork_args *uap) #else flags = RFFDG | RFPROC | RFPPWAIT | RFMEM; #endif - error = fork1(td, flags, 0, &p2); + error = fork1(td, flags, 0, &p2, NULL, 0); if (error == 0) { td->td_retval[0] = p2->p_pid; td->td_retval[1] = 0; @@ -143,7 +174,7 @@ rfork(struct thread *td, struct rfork_args *uap) return (EINVAL); AUDIT_ARG_FFLAGS(uap->flags); - error = fork1(td, uap->flags, 0, &p2); + error = fork1(td, uap->flags, 0, &p2, NULL, 0); if (error == 0) { td->td_retval[0] = p2 ? p2->p_pid : 0; td->td_retval[1] = 0; @@ -337,7 +368,7 @@ fork_norfproc(struct thread *td, int flags) static void do_fork(struct thread *td, int flags, struct proc *p2, struct thread *td2, - struct vmspace *vm2) + struct vmspace *vm2, int pdflags) { struct proc *p1, *pptr; int p2_held, trypid; @@ -625,6 +656,16 @@ do_fork(struct thread *td, int flags, struct proc *p2, struct thread *td2, p2->p_vmspace->vm_ssize); } +#ifdef PROCDESC + /* + * Associate the process descriptor with the process before anything + * can happen that might cause that process to need the descriptor. + * However, don't do this until after fork(2) can no longer fail. + */ + if (flags & RFPROCDESC) + procdesc_new(p2, pdflags); +#endif + /* * Both processes are set up, now check if any loadable modules want * to adjust anything. @@ -710,7 +751,8 @@ do_fork(struct thread *td, int flags, struct proc *p2, struct thread *td2, } int -fork1(struct thread *td, int flags, int pages, struct proc **procp) +fork1(struct thread *td, int flags, int pages, struct proc **procp, + int *procdescp, int pdflags) { struct proc *p1; struct proc *newproc; @@ -721,6 +763,9 @@ fork1(struct thread *td, int flags, int pages, struct proc **procp) int error; static int curfail; static struct timeval lastfail; +#ifdef PROCDESC + struct file *fp_procdesc = NULL; +#endif /* Check for the undefined or unimplemented flags. */ if ((flags & ~(RFFLAGS | RFTSIGFLAGS(RFTSIGMASK))) != 0) @@ -738,6 +783,18 @@ fork1(struct thread *td, int flags, int pages, struct proc **procp) if ((flags & RFTSIGZMB) != 0 && (u_int)RFTSIGNUM(flags) > _SIG_MAXSIG) return (EINVAL); +#ifdef PROCDESC + if ((flags & RFPROCDESC) != 0) { + /* Can't not create a process yet get a process descriptor. */ + if ((flags & RFPROC) == 0) + return (EINVAL); + + /* Must provide a place to put a procdesc if creating one. */ + if (procdescp == NULL) + return (EINVAL); + } +#endif + p1 = td->td_proc; /* @@ -757,6 +814,25 @@ fork1(struct thread *td, int flags, int pages, struct proc **procp) return (EAGAIN); #endif +#ifdef PROCDESC + /* + * If required, create a process descriptor in the parent first; we + * will abandon it if something goes wrong. We don't finit() until + * later. + */ + if (flags & RFPROCDESC) { + error = falloc(td, &fp_procdesc, procdescp, 0); + if (error != 0) { +#ifdef RACCT + PROC_LOCK(p1); + racct_sub(p1, RACCT_NPROC, 1); + PROC_UNLOCK(p1); +#endif + return (error); + } + } +#endif + mem_charged = 0; vm2 = NULL; if (pages == 0) @@ -868,12 +944,16 @@ fork1(struct thread *td, int flags, int pages, struct proc **procp) PROC_UNLOCK(p1); } if (ok) { - do_fork(td, flags, newproc, td2, vm2); + do_fork(td, flags, newproc, td2, vm2, pdflags); /* * Return child proc pointer to parent. */ *procp = newproc; +#ifdef PROCDESC + if (flags & RFPROCDESC) + procdesc_finit(newproc->p_procdesc, fp_procdesc); +#endif return (0); } @@ -892,6 +972,10 @@ fork1(struct thread *td, int flags, int pages, struct proc **procp) if (vm2 != NULL) vmspace_free(vm2); uma_zfree(proc_zone, newproc); +#ifdef PROCDESC + if (((flags & RFPROCDESC) != 0) && (fp_procdesc != NULL)) + fdrop(fp_procdesc, td); +#endif pause("fork", hz / 2); #ifdef RACCT PROC_LOCK(p1); diff --git a/sys/kern/kern_kthread.c b/sys/kern/kern_kthread.c index 95f896fa5aa..bb1246980d5 100644 --- a/sys/kern/kern_kthread.c +++ b/sys/kern/kern_kthread.c @@ -88,7 +88,7 @@ kproc_create(void (*func)(void *), void *arg, panic("kproc_create called too soon"); error = fork1(&thread0, RFMEM | RFFDG | RFPROC | RFSTOPPED | flags, - pages, &p2); + pages, &p2, NULL, 0); if (error) return error; diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c index e1861eb1e81..26ef0d7f31d 100644 --- a/sys/kern/kern_sig.c +++ b/sys/kern/kern_sig.c @@ -41,12 +41,14 @@ __FBSDID("$FreeBSD$"); #include "opt_kdtrace.h" #include "opt_ktrace.h" #include "opt_core.h" +#include "opt_procdesc.h" #include #include #include #include #include +#include #include #include #include @@ -59,6 +61,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -1698,6 +1701,34 @@ kill(struct thread *td, struct kill_args *uap) /* NOTREACHED */ } +int +pdkill(td, uap) + struct thread *td; + struct pdkill_args *uap; +{ +#ifdef PROCDESC + struct proc *p; + int error; + + AUDIT_ARG_SIGNUM(uap->signum); + AUDIT_ARG_FD(uap->fd); + if ((u_int)uap->signum > _SIG_MAXSIG) + return (EINVAL); + + error = procdesc_find(td, uap->fd, CAP_PDKILL, &p); + if (error) + return (error); + AUDIT_ARG_PROCESS(p); + error = p_cansignal(td, p, uap->signum); + if (error == 0 && uap->signum) + psignal(p, uap->signum); + PROC_UNLOCK(p); + return (error); +#else + return (ENOSYS); +#endif +} + #if defined(COMPAT_43) #ifndef _SYS_SYSPROTO_H_ struct okillpg_args { diff --git a/sys/kern/sys_procdesc.c b/sys/kern/sys_procdesc.c new file mode 100644 index 00000000000..9993732527c --- /dev/null +++ b/sys/kern/sys_procdesc.c @@ -0,0 +1,524 @@ +/*- + * Copyright (c) 2009 Robert N. M. Watson + * All rights reserved. + * + * This software was developed at the University of Cambridge Computer + * Laboratory with support from a grant from Google, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/*- + * FreeBSD process descriptor facility. + * + * Some processes are represented by a file descriptor, which will be used in + * preference to signaling and pids for the purposes of process management, + * and is, in effect, a form of capability. When a process descriptor is + * used with a process, it ceases to be visible to certain traditional UNIX + * process facilities, such as waitpid(2). + * + * Some semantics: + * + * - At most one process descriptor will exist for any process, although + * references to that descriptor may be held from many processes (or even + * be in flight between processes over a local domain socket). + * - Last close on the process descriptor will terminate the process using + * SIGKILL and reparent it to init so that there's a process to reap it + * when it's done exiting. + * - If the process exits before the descriptor is closed, it will not + * generate SIGCHLD on termination, or be picked up by waitpid(). + * - The pdkill(2) system call may be used to deliver a signal to the process + * using its process descriptor. + * - The pdwait4(2) system call may be used to block (or not) on a process + * descriptor to collect termination information. + * + * Open questions: + * + * - How to handle ptrace(2)? + * - Will we want to add a pidtoprocdesc(2) system call to allow process + * descriptors to be created for processes without pfork(2)? + */ + +#include +__FBSDID("$FreeBSD$"); + +#include "opt_procdesc.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#ifdef PROCDESC + +FEATURE(process_descriptors, "Process Descriptors"); + +static uma_zone_t procdesc_zone; + +static fo_rdwr_t procdesc_read; +static fo_rdwr_t procdesc_write; +static fo_truncate_t procdesc_truncate; +static fo_ioctl_t procdesc_ioctl; +static fo_poll_t procdesc_poll; +static fo_kqfilter_t procdesc_kqfilter; +static fo_stat_t procdesc_stat; +static fo_close_t procdesc_close; +static fo_chmod_t procdesc_chmod; +static fo_chown_t procdesc_chown; + +static struct fileops procdesc_ops = { + .fo_read = procdesc_read, + .fo_write = procdesc_write, + .fo_truncate = procdesc_truncate, + .fo_ioctl = procdesc_ioctl, + .fo_poll = procdesc_poll, + .fo_kqfilter = procdesc_kqfilter, + .fo_stat = procdesc_stat, + .fo_close = procdesc_close, + .fo_chmod = procdesc_chmod, + .fo_chown = procdesc_chown, + .fo_flags = DFLAG_PASSABLE, +}; + +/* + * Initialize with VFS so that process descriptors are available along with + * other file descriptor types. As long as it runs before init(8) starts, + * there shouldn't be a problem. + */ +static void +procdesc_init(void *dummy __unused) +{ + + procdesc_zone = uma_zcreate("procdesc", sizeof(struct procdesc), + NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); + if (procdesc_zone == NULL) + panic("procdesc_init: procdesc_zone not initialized"); +} +SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_ANY, procdesc_init, NULL); + +/* + * Return a locked process given a process descriptor, or ESRCH if it has + * died. + */ +int +procdesc_find(struct thread *td, int fd, cap_rights_t rights, + struct proc **p) +{ + struct procdesc *pd; + struct file *fp; + int error; + + error = fget(td, fd, rights, &fp); + if (error) + return (error); + if (fp->f_type != DTYPE_PROCDESC) { + error = EBADF; + goto out; + } + pd = fp->f_data; + sx_slock(&proctree_lock); + if (pd->pd_proc != NULL) { + *p = pd->pd_proc; + PROC_LOCK(*p); + } else + error = ESRCH; + sx_sunlock(&proctree_lock); +out: + fdrop(fp, td); + return (error); +} + +/* + * Function to be used by procstat(1) sysctls when returning procdesc + * information. + */ +pid_t +procdesc_pid(struct file *fp_procdesc) +{ + struct procdesc *pd; + + KASSERT(fp_procdesc->f_type == DTYPE_PROCDESC, + ("procdesc_pid: !procdesc")); + + pd = fp_procdesc->f_data; + return (pd->pd_pid); +} + +/* + * Retrieve the PID associated with a process descriptor. + */ +int +kern_pdgetpid(struct thread *td, int fd, cap_rights_t rights, pid_t *pidp) +{ + struct file *fp; + int error; + + error = fget(td, fd, rights, &fp); + if (error) + return (error); + if (fp->f_type != DTYPE_PROCDESC) { + error = EBADF; + goto out; + } + *pidp = procdesc_pid(fp); +out: + fdrop(fp, td); + return (error); +} + +/* + * System call to return the pid of a process given its process descriptor. + */ +int +pdgetpid(struct thread *td, struct pdgetpid_args *uap) +{ + pid_t pid; + int error; + + AUDIT_ARG_FD(uap->fd); + error = kern_pdgetpid(td, uap->fd, CAP_PDGETPID, &pid); + if (error == 0) + error = copyout(&pid, uap->pidp, sizeof(pid)); + return (error); +} + +/* + * When a new process is forked by pdfork(), a file descriptor is allocated + * by the fork code first, then the process is forked, and then we get a + * chance to set up the process descriptor. Failure is not permitted at this + * point, so procdesc_new() must succeed. + */ +void +procdesc_new(struct proc *p, int flags) +{ + struct procdesc *pd; + + pd = uma_zalloc(procdesc_zone, M_WAITOK | M_ZERO); + pd->pd_proc = p; + pd->pd_pid = p->p_pid; + p->p_procdesc = pd; + pd->pd_flags = 0; + if (flags & PD_DAEMON) + pd->pd_flags |= PDF_DAEMON; + PROCDESC_LOCK_INIT(pd); + + /* + * Process descriptors start out with two references: one from their + * struct file, and the other from their struct proc. + */ + refcount_init(&pd->pd_refcount, 2); +} + +/* + * Initialize a file with a process descriptor. + */ +void +procdesc_finit(struct procdesc *pdp, struct file *fp) +{ + + finit(fp, FREAD | FWRITE, DTYPE_PROCDESC, pdp, &procdesc_ops); +} + +static void +procdesc_free(struct procdesc *pd) +{ + + /* + * When the last reference is released, we assert that the descriptor + * has been closed, but not that the process has exited, as we will + * detach the descriptor before the process dies if the descript is + * closed, as we can't wait synchronously. + */ + if (refcount_release(&pd->pd_refcount)) { + KASSERT(pd->pd_proc == NULL, + ("procdesc_free: pd_proc != NULL")); + KASSERT((pd->pd_flags & PDF_CLOSED), + ("procdesc_free: !PDF_CLOSED")); + + PROCDESC_LOCK_DESTROY(pd); + uma_zfree(procdesc_zone, pd); + } +} + +/* + * procdesc_exit() - notify a process descriptor that its process is exiting. + * We use the proctree_lock to ensure that process exit either happens + * strictly before or strictly after a concurrent call to procdesc_close(). + */ +int +procdesc_exit(struct proc *p) +{ + struct procdesc *pd; + + sx_assert(&proctree_lock, SA_XLOCKED); + PROC_LOCK_ASSERT(p, MA_OWNED); + KASSERT(p->p_procdesc != NULL, ("procdesc_exit: p_procdesc NULL")); + + pd = p->p_procdesc; + + PROCDESC_LOCK(pd); + KASSERT((pd->pd_flags & PDF_CLOSED) == 0 || p->p_pptr == initproc, + ("procdesc_exit: closed && parent not init")); + + pd->pd_flags |= PDF_EXITED; + + /* + * If the process descriptor has been closed, then we have nothing + * to do; return 1 so that init will get SIGCHLD and do the reaping. + * Clean up the procdesc now rather than letting it happen during + * that reap. + */ + if (pd->pd_flags & PDF_CLOSED) { + PROCDESC_UNLOCK(pd); + pd->pd_proc = NULL; + p->p_procdesc = NULL; + procdesc_free(pd); + return (1); + } + if (pd->pd_flags & PDF_SELECTED) { + pd->pd_flags &= ~PDF_SELECTED; + selwakeup(&pd->pd_selinfo); + } + PROCDESC_UNLOCK(pd); + return (0); +} + +/* + * When a process descriptor is reaped, perhaps as a result of close() or + * pdwait4(), release the process's reference on the process descriptor. + */ +void +procdesc_reap(struct proc *p) +{ + struct procdesc *pd; + + sx_assert(&proctree_lock, SA_XLOCKED); + KASSERT(p->p_procdesc != NULL, ("procdesc_reap: p_procdesc == NULL")); + + pd = p->p_procdesc; + pd->pd_proc = NULL; + procdesc_free(pd); +} + +/* + * procdesc_close() - last close on a process descriptor. If the process is + * still running, terminate with SIGKILL (unless PD_DAEMON is set) and let + * init(8) clean up the mess; if not, we have to clean up the zombie ourselves. + */ +static int +procdesc_close(struct file *fp, struct thread *td) +{ + struct procdesc *pd; + struct proc *p; + + KASSERT(fp->f_type == DTYPE_PROCDESC, ("procdesc_close: !procdesc")); + + pd = fp->f_data; + fp->f_ops = &badfileops; + fp->f_data = NULL; + + sx_xlock(&proctree_lock); + PROCDESC_LOCK(pd); + pd->pd_flags |= PDF_CLOSED; + PROCDESC_UNLOCK(pd); + p = pd->pd_proc; + PROC_LOCK(p); + if (p->p_state == PRS_ZOMBIE) { + /* + * If the process is already dead and just awaiting reaping, + * do that now. This will release the process's reference to + * the process descriptor when it calls back into + * procdesc_reap(). + */ + PROC_SLOCK(p); + proc_reap(curthread, p, NULL, 0, NULL); + } else { + /* + * If the process is not yet dead, we need to kill it, but we + * can't wait around synchronously for it to go away, as that + * path leads to madness (and deadlocks). First, detach the + * process from its descriptor so that its exit status will + * be reported normally. + */ + pd->pd_proc = NULL; + p->p_procdesc = NULL; + procdesc_free(pd); + + /* + * Next, reparent it to init(8) so that there's someone to + * pick up the pieces; finally, terminate with prejudice. + */ + p->p_sigparent = SIGCHLD; + proc_reparent(p, initproc); + if ((pd->pd_flags & PD_DAEMON) == 0) + psignal(p, SIGKILL); + PROC_UNLOCK(p); + sx_xunlock(&proctree_lock); + } + + /* + * Release the file descriptor's reference on the process descriptor. + */ + procdesc_free(pd); + return (0); +} + +static int +procdesc_read(struct file *fp, struct uio *uio, struct ucred *active_cred, + int flags, struct thread *td) +{ + + return (EOPNOTSUPP); +} + +static int +procdesc_write(struct file *fp, struct uio *uio, struct ucred *active_cred, + int flags, struct thread *td) +{ + + return (EOPNOTSUPP); +} + +static int +procdesc_truncate(struct file *fp, off_t length, struct ucred *active_cred, + struct thread *td) +{ + + return (EOPNOTSUPP); +} + +static int +procdesc_ioctl(struct file *fp, u_long com, void *data, + struct ucred *active_cred, struct thread *td) +{ + + return (EOPNOTSUPP); +} + +static int +procdesc_poll(struct file *fp, int events, struct ucred *active_cred, + struct thread *td) +{ + struct procdesc *pd; + int revents; + + revents = 0; + pd = fp->f_data; + PROCDESC_LOCK(pd); + if (pd->pd_flags & PDF_EXITED) + revents |= POLLHUP; + if (revents == 0) { + selrecord(td, &pd->pd_selinfo); + pd->pd_flags |= PDF_SELECTED; + } + PROCDESC_UNLOCK(pd); + return (revents); +} + +static int +procdesc_kqfilter(struct file *fp, struct knote *kn) +{ + + return (EOPNOTSUPP); +} + +static int +procdesc_stat(struct file *fp, struct stat *sb, struct ucred *active_cred, + struct thread *td) +{ + struct procdesc *pd; + struct timeval pstart; + + /* + * XXXRW: Perhaps we should cache some more information from the + * process so that we can return it reliably here even after it has + * died. For example, caching its credential data. + */ + bzero(sb, sizeof(*sb)); + pd = fp->f_data; + sx_slock(&proctree_lock); + if (pd->pd_proc != NULL) { + PROC_LOCK(pd->pd_proc); + + /* Set birth and [acm] times to process start time. */ + pstart = pd->pd_proc->p_stats->p_start; + timevaladd(&pstart, &boottime); + TIMEVAL_TO_TIMESPEC(&pstart, &sb->st_birthtim); + sb->st_atim = sb->st_birthtim; + sb->st_ctim = sb->st_birthtim; + sb->st_mtim = sb->st_birthtim; + if (pd->pd_proc->p_state != PRS_ZOMBIE) + sb->st_mode = S_IFREG | S_IRWXU; + else + sb->st_mode = S_IFREG; + sb->st_uid = pd->pd_proc->p_ucred->cr_ruid; + sb->st_gid = pd->pd_proc->p_ucred->cr_rgid; + PROC_UNLOCK(pd->pd_proc); + } else + sb->st_mode = S_IFREG; + sx_sunlock(&proctree_lock); + return (0); +} + +static int +procdesc_chmod(struct file *fp, mode_t mode, struct ucred *active_cred, + struct thread *td) +{ + + return (EOPNOTSUPP); +} + +static int +procdesc_chown(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred, + struct thread *td) +{ + + return (EOPNOTSUPP); +} + +#else /* !PROCDESC */ + +int +pdgetpid(struct thread *td, struct pdgetpid_args *uap) +{ + + return (ENOSYS); +} + +#endif /* PROCDESC */ diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master index 0b249a5b55a..b79c6c7109c 100644 --- a/sys/kern/syscalls.master +++ b/sys/kern/syscalls.master @@ -919,10 +919,10 @@ u_int64_t *rightsp); } 516 AUE_CAP_ENTER STD { int cap_enter(void); } 517 AUE_CAP_GETMODE STD { int cap_getmode(u_int *modep); } -518 AUE_PDFORK UNIMPL pdfork -519 AUE_PDKILL UNIMPL pdkill -520 AUE_PDGETPID UNIMPL pdgetpid -521 AUE_PDWAIT UNIMPL pdwait +518 AUE_PDFORK STD { int pdfork(int *fdp, int flags); } +519 AUE_PDKILL STD { int pdkill(int fd, int signum); } +520 AUE_PDGETPID STD { int pdgetpid(int fd, pid_t *pidp); } +521 AUE_PDWAIT UNIMPL pdwait4 522 AUE_SELECT STD { int pselect(int nd, fd_set *in, \ fd_set *ou, fd_set *ex, \ const struct timespec *ts, \ diff --git a/sys/sys/capability.h b/sys/sys/capability.h index d67dc179b0d..81446a28190 100644 --- a/sys/sys/capability.h +++ b/sys/sys/capability.h @@ -131,8 +131,13 @@ #define CAP_IOCTL 0x0004000000000000ULL #define CAP_TTYHOOK 0x0008000000000000ULL +/* Process management via process descriptors. */ +#define CAP_PDGETPID 0x0010000000000000ULL +#define CAP_PDWAIT 0x0020000000000000ULL +#define CAP_PDKILL 0x0040000000000000ULL + /* The mask of all valid method rights. */ -#define CAP_MASK_VALID 0x000fffffffffffffULL +#define CAP_MASK_VALID 0x007fffffffffffffULL #ifdef _KERNEL diff --git a/sys/sys/file.h b/sys/sys/file.h index 5a4af332ebb..57e7047e8b3 100644 --- a/sys/sys/file.h +++ b/sys/sys/file.h @@ -65,6 +65,7 @@ struct socket; #define DTYPE_PTS 10 /* pseudo teletype master device */ #define DTYPE_DEV 11 /* Device specific fd type */ #define DTYPE_CAPABILITY 12 /* capability */ +#define DTYPE_PROCDESC 13 /* process descriptor */ #ifdef _KERNEL diff --git a/sys/sys/proc.h b/sys/sys/proc.h index 233efe98564..67adbe5b589 100644 --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -166,6 +166,7 @@ struct mqueue_notifier; struct nlminfo; struct p_sched; struct proc; +struct procdesc; struct racct; struct sleepqueue; struct td_sched; @@ -534,6 +535,7 @@ struct proc { int p_boundary_count;/* (c) Num threads at user boundary */ int p_pendingcnt; /* how many signals are pending */ struct itimers *p_itimers; /* (c) POSIX interval timers. */ + struct procdesc *p_procdesc; /* (e) Process descriptor, if any. */ /* End area that is zeroed on creation. */ #define p_endzero p_magic @@ -822,7 +824,7 @@ int enterpgrp(struct proc *p, pid_t pgid, struct pgrp *pgrp, int enterthispgrp(struct proc *p, struct pgrp *pgrp); void faultin(struct proc *p); void fixjobc(struct proc *p, struct pgrp *pgrp, int entering); -int fork1(struct thread *, int, int, struct proc **); +int fork1(struct thread *, int, int, struct proc **, int *, int); void fork_exit(void (*)(void *, struct trapframe *), void *, struct trapframe *); void fork_return(struct thread *, struct trapframe *); @@ -844,6 +846,8 @@ void pargs_hold(struct pargs *pa); void procinit(void); void proc_linkup0(struct proc *p, struct thread *td); void proc_linkup(struct proc *p, struct thread *td); +void proc_reap(struct thread *td, struct proc *p, int *status, int options, + struct rusage *rusage); void proc_reparent(struct proc *child, struct proc *newparent); struct pstats *pstats_alloc(void); void pstats_fork(struct pstats *src, struct pstats *dst); diff --git a/sys/sys/procdesc.h b/sys/sys/procdesc.h new file mode 100644 index 00000000000..cc8b7166f63 --- /dev/null +++ b/sys/sys/procdesc.h @@ -0,0 +1,119 @@ +/*- + * Copyright (c) 2009 Robert N. M. Watson + * All rights reserved. + * + * This software was developed at the University of Cambridge Computer + * Laboratory with support from a grant from Google, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _SYS_PROCDESC_H_ +#define _SYS_PROCDESC_H_ + +#ifdef _KERNEL +#include /* struct selinfo */ +#include +#include + +/*- + * struct procdesc describes a process descriptor, and essentially consists + * of two pointers -- one to the file descriptor, and one to the process. + * When both become NULL, the process descriptor will be freed. An important + * invariant is that there is only ever one process descriptor for a process, + * so a single file pointer will suffice. + * + * Locking key: + * (c) - Constant after initial setup. + * (p) - Protected by the process descriptor mutex. + * (r) - Atomic eference count. + * (s) - Protected by selinfo. + * (t) - Protected by the proctree_lock + */ +struct proc; +struct sigio; +struct procdesc { + /* + * Basic process descriptor state: the process, a cache of its pid to + * satisfy queries after the process exits, and process descriptor + * refcount. + */ + struct proc *pd_proc; /* (t) Process. */ + pid_t pd_pid; /* (c) Cached pid. */ + u_int pd_refcount; /* (r) Reference count. */ + + /* + * In-flight data and notification of events. + */ + int pd_flags; /* (p) PD_ flags. */ + struct selinfo pd_selinfo; /* (p) Event notification. */ + struct mtx pd_lock; /* Protect data + events. */ +}; + +/* + * Locking macros for the procdesc itself. + */ +#define PROCDESC_LOCK_DESTROY(pd) mtx_destroy(&(pd)->pd_lock) +#define PROCDESC_LOCK_INIT(pd) mtx_init(&(pd)->pd_lock, "procdesc", NULL, \ + MTX_DEF) +#define PROCDESC_LOCK(pd) mtx_lock(&(pd)->pd_lock) +#define PROCDESC_UNLOCK(pd) mtx_unlock(&(pd)->pd_lock) + +/* + * Flags for the pd_flags field. + */ +#define PDF_CLOSED 0x00000001 /* Descriptor has closed. */ +#define PDF_SELECTED 0x00000002 /* Issue selwakeup(). */ +#define PDF_EXITED 0x00000004 /* Process exited. */ +#define PDF_DAEMON 0x00000008 /* Don't exit when procdesc closes. */ + +/* + * In-kernel interfaces to process descriptors. + */ +int procdesc_exit(struct proc *); +int procdesc_find(struct thread *, int fd, cap_rights_t, struct proc **); +int kern_pdgetpid(struct thread *, int fd, cap_rights_t, pid_t *pidp); +void procdesc_new(struct proc *, int); +void procdesc_finit(struct procdesc *, struct file *); +pid_t procdesc_pid(struct file *); +void procdesc_reap(struct proc *); + +#else /* !_KERNEL */ + +/* + * Process descriptor system calls. + */ +struct rusage; +int pdfork(int *, int); +int pdkill(int, int); +int pdgetpid(int, pid_t *); + +#endif /* _KERNEL */ + +/* + * Flags which can be passed to pdfork(2). + */ +#define PD_DAEMON 0x00000001 /* Don't exit when procdesc closes. */ + +#endif /* !_SYS_PROCDESC_H_ */ diff --git a/sys/sys/unistd.h b/sys/sys/unistd.h index 9d56a3a4273..9e7f7e6d834 100644 --- a/sys/sys/unistd.h +++ b/sys/sys/unistd.h @@ -185,11 +185,12 @@ #define RFTSIGMASK 0xFF #define RFTSIGNUM(flags) (((flags) >> RFTSIGSHIFT) & RFTSIGMASK) #define RFTSIGFLAGS(signum) ((signum) << RFTSIGSHIFT) +#define RFPROCDESC (1<<28) /* return a process descriptor */ #define RFPPWAIT (1<<31) /* parent sleeps until child exits (vfork) */ -#define RFKERNELONLY (RFSTOPPED | RFHIGHPID | RFPPWAIT) #define RFFLAGS (RFFDG | RFPROC | RFMEM | RFNOWAIT | RFCFDG | \ RFTHREAD | RFSIGSHARE | RFLINUXTHPN | RFSTOPPED | RFHIGHPID | RFTSIGZMB | \ - RFPPWAIT) + RFPROCDESC | RFPPWAIT) +#define RFKERNELONLY (RFSTOPPED | RFHIGHPID | RFPPWAIT | RFPROCDESC) #endif /* __BSD_VISIBLE */ diff --git a/sys/sys/user.h b/sys/sys/user.h index ecf4ea94013..a139d4fdfab 100644 --- a/sys/sys/user.h +++ b/sys/sys/user.h @@ -252,6 +252,7 @@ struct user { #define KF_TYPE_SEM 9 #define KF_TYPE_PTS 10 /* no KF_TYPE_CAPABILITY (11), since capabilities wrap other file objects */ +#define KF_TYPE_PROCDESC 12 #define KF_TYPE_UNKNOWN 255 #define KF_VTYPE_VNON 0 @@ -377,6 +378,9 @@ struct kinfo_file { /* Round to 64 bit alignment. */ uint32_t kf_pts_pad0[7]; } kf_pts; + struct { + pid_t kf_pid; + } kf_proc; } kf_un; uint16_t kf_status; /* Status flags. */ uint16_t kf_pad1; /* Round to 32 bit alignment. */