diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c index 4014006ce2e..73ac2c6efc4 100644 --- a/sys/kern/uipc_socket.c +++ b/sys/kern/uipc_socket.c @@ -818,8 +818,6 @@ soalloc(struct vnet *vnet) * a feature to change class of an existing lock, so we use DUPOK. */ mtx_init(&so->so_lock, "socket", NULL, MTX_DEF | MTX_DUPOK); - mtx_init(&so->so_snd_mtx, "so_snd", NULL, MTX_DEF); - mtx_init(&so->so_rcv_mtx, "so_rcv", NULL, MTX_DEF); so->so_rcv.sb_sel = &so->so_rdsel; so->so_snd.sb_sel = &so->so_wrsel; sx_init(&so->so_snd_sx, "so_snd_sx"); @@ -893,14 +891,47 @@ sodealloc(struct socket *so) &so->so_snd.sb_hiwat, 0, RLIM_INFINITY); sx_destroy(&so->so_snd_sx); sx_destroy(&so->so_rcv_sx); - mtx_destroy(&so->so_snd_mtx); - mtx_destroy(&so->so_rcv_mtx); } crfree(so->so_cred); mtx_destroy(&so->so_lock); uma_zfree(socket_zone, so); } +/* + * Shim to accomodate protocols that already do their own socket buffers + * management (marked with PR_SOCKBUF) with protocols that yet do not. + * + * Attach via socket(2) is different from attach via accept(2). In case of + * normal socket(2) syscall it is the pr_attach that calls soreserve(), even + * for protocols that don't yet do PR_SOCKBUF. In case of accepted connection + * it is our shim that calls soreserve() and the hiwat values are taken from + * the parent socket. + */ +static int +soattach(struct socket *so, int proto, struct thread *td, struct socket *head) +{ + int error; + + VNET_ASSERT(curvnet == so->so_vnet, + ("%s: %p != %p", __func__, curvnet, so->so_vnet)); + + if ((so->so_proto->pr_flags & PR_SOCKBUF) == 0) { + mtx_init(&so->so_snd_mtx, "so_snd", NULL, MTX_DEF); + mtx_init(&so->so_rcv_mtx, "so_rcv", NULL, MTX_DEF); + so->so_snd.sb_mtx = &so->so_snd_mtx; + so->so_rcv.sb_mtx = &so->so_rcv_mtx; + } + if (head == NULL || (error = soreserve(so, head->sol_sbsnd_hiwat, + head->sol_sbrcv_hiwat)) == 0) + error = so->so_proto->pr_attach(so, proto, td); + if (error != 0 && (so->so_proto->pr_flags & PR_SOCKBUF) == 0) { + mtx_destroy(&so->so_snd_mtx); + mtx_destroy(&so->so_rcv_mtx); + } + + return (error); +} + /* * socreate returns a socket with a ref count of 1 and a file descriptor * reference. The socket should be closed with soclose(). @@ -956,16 +987,8 @@ socreate(int dom, struct socket **aso, int type, int proto, so_rdknl_assert_lock); knlist_init(&so->so_wrsel.si_note, so, so_wrknl_lock, so_wrknl_unlock, so_wrknl_assert_lock); - if ((prp->pr_flags & PR_SOCKBUF) == 0) { - so->so_snd.sb_mtx = &so->so_snd_mtx; - so->so_rcv.sb_mtx = &so->so_rcv_mtx; - } - /* - * Auto-sizing of socket buffers is managed by the protocols and - * the appropriate flags must be set in the pr_attach() method. - */ CURVNET_SET(so->so_vnet); - error = prp->pr_attach(so, proto, td); + error = soattach(so, proto, td, NULL); CURVNET_RESTORE(); if (error) { sodealloc(so); @@ -1192,13 +1215,6 @@ solisten_clone(struct socket *head) so_rdknl_assert_lock); knlist_init(&so->so_wrsel.si_note, so, so_wrknl_lock, so_wrknl_unlock, so_wrknl_assert_lock); - VNET_SO_ASSERT(head); - if (soreserve(so, head->sol_sbsnd_hiwat, head->sol_sbrcv_hiwat)) { - sodealloc(so); - log(LOG_DEBUG, "%s: pcb %p: soreserve() failed\n", - __func__, head->so_pcb); - return (NULL); - } so->so_rcv.sb_lowat = head->sol_sbrcv_lowat; so->so_snd.sb_lowat = head->sol_sbsnd_lowat; so->so_rcv.sb_timeo = head->sol_sbrcv_timeo; @@ -1206,10 +1222,6 @@ solisten_clone(struct socket *head) so->so_rcv.sb_flags = head->sol_sbrcv_flags & SB_AUTOSIZE; so->so_snd.sb_flags = head->sol_sbsnd_flags & (SB_AUTOSIZE | SB_AUTOLOWAT); - if ((so->so_proto->pr_flags & PR_SOCKBUF) == 0) { - so->so_snd.sb_mtx = &so->so_snd_mtx; - so->so_rcv.sb_mtx = &so->so_rcv_mtx; - } return (so); } @@ -1223,7 +1235,7 @@ sonewconn(struct socket *head, int connstatus) if ((so = solisten_clone(head)) == NULL) return (NULL); - if (so->so_proto->pr_attach(so, 0, NULL) != 0) { + if (soattach(so, 0, NULL, head) != 0) { sodealloc(so); log(LOG_DEBUG, "%s: pcb %p: pr_attach() failed\n", __func__, head->so_pcb); @@ -1327,14 +1339,7 @@ sopeeloff(struct socket *head) so_rdknl_assert_lock); knlist_init(&so->so_wrsel.si_note, so, so_wrknl_lock, so_wrknl_unlock, so_wrknl_assert_lock); - VNET_SO_ASSERT(head); - if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat)) { - sodealloc(so); - log(LOG_DEBUG, "%s: pcb %p: soreserve() failed\n", - __func__, head->so_pcb); - return (NULL); - } - if (so->so_proto->pr_attach(so, 0, NULL)) { + if (soattach(so, 0, NULL, head)) { sodealloc(so); log(LOG_DEBUG, "%s: pcb %p: pr_attach() failed\n", __func__, head->so_pcb); @@ -1346,10 +1351,6 @@ sopeeloff(struct socket *head) so->so_snd.sb_timeo = head->so_snd.sb_timeo; so->so_rcv.sb_flags |= head->so_rcv.sb_flags & SB_AUTOSIZE; so->so_snd.sb_flags |= head->so_snd.sb_flags & SB_AUTOSIZE; - if ((so->so_proto->pr_flags & PR_SOCKBUF) == 0) { - so->so_snd.sb_mtx = &so->so_snd_mtx; - so->so_rcv.sb_mtx = &so->so_rcv_mtx; - } soref(so); @@ -1906,6 +1907,8 @@ sofree(struct socket *so) SOCK_SENDBUF_UNLOCK(so); SOCK_RECVBUF_UNLOCK(so); #endif + mtx_destroy(&so->so_snd_mtx); + mtx_destroy(&so->so_rcv_mtx); } seldrain(&so->so_rdsel); seldrain(&so->so_wrsel); diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c index 60736af5adf..d56aac883d9 100644 --- a/sys/kern/uipc_usrreq.c +++ b/sys/kern/uipc_usrreq.c @@ -479,7 +479,7 @@ uipc_attach(struct socket *so, int proto, struct thread *td) { u_long sendspace, recvspace; struct unpcb *unp; - int error; + int error, rcvmtxopts; bool locked; KASSERT(so->so_pcb == NULL, ("uipc_attach: so_pcb != NULL")); @@ -494,6 +494,7 @@ uipc_attach(struct socket *so, int proto, struct thread *td) * limits to unpdg_recvspace. */ sendspace = recvspace = unpdg_recvspace; + rcvmtxopts = 0; break; case SOCK_STREAM: @@ -505,14 +506,7 @@ uipc_attach(struct socket *so, int proto, struct thread *td) sendspace = unpsp_sendspace; recvspace = unpsp_recvspace; common: - /* - * XXXGL: we need to initialize the mutex with MTX_DUPOK. - * Ideally, protocols that have PR_SOCKBUF should be - * responsible for mutex initialization officially, and then - * this uglyness with mtx_destroy(); mtx_init(); would go away. - */ - mtx_destroy(&so->so_rcv_mtx); - mtx_init(&so->so_rcv_mtx, "so_rcv", NULL, MTX_DEF | MTX_DUPOK); + rcvmtxopts = MTX_DUPOK; knlist_init(&so->so_wrsel.si_note, so, uipc_wrknl_lock, uipc_wrknl_unlock, uipc_wrknl_assert_lock); STAILQ_INIT(&so->so_rcv.uxst_mbq); @@ -520,6 +514,8 @@ uipc_attach(struct socket *so, int proto, struct thread *td) default: panic("uipc_attach"); } + mtx_init(&so->so_rcv_mtx, "unix so_rcv", NULL, MTX_DEF | rcvmtxopts); + mtx_init(&so->so_snd_mtx, "unix so_snd", NULL, MTX_DEF); error = soreserve(so, sendspace, recvspace); if (error) return (error); @@ -888,6 +884,9 @@ uipc_detach(struct socket *so) MPASS(TAILQ_EMPTY(&so->so_rcv.uxdg_conns)); MPASS(STAILQ_EMPTY(&so->so_snd.uxdg_mb)); } + + mtx_destroy(&so->so_snd_mtx); + mtx_destroy(&so->so_rcv_mtx); } static int diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c index 8c58be63cd5..4b501b221bc 100644 --- a/sys/netinet/tcp_syncache.c +++ b/sys/netinet/tcp_syncache.c @@ -772,12 +772,21 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m) NET_EPOCH_ASSERT(); /* - * Ok, create the full blown connection, and set things up - * as they would have been set up if we had created the - * connection when the SYN arrived. + * Creation of a socket via solisten_clone() bypasses call to pr_attach. + * That's why there is some pasted code from soattach() and from + * tcp_usr_attach() here. This should improve once TCP is PR_SOCKBUF. */ if ((so = solisten_clone(lso)) == NULL) goto allocfail; + mtx_init(&so->so_snd_mtx, "so_snd", NULL, MTX_DEF); + mtx_init(&so->so_rcv_mtx, "so_rcv", NULL, MTX_DEF); + so->so_snd.sb_mtx = &so->so_snd_mtx; + so->so_rcv.sb_mtx = &so->so_rcv_mtx; + error = soreserve(so, lso->sol_sbsnd_hiwat, lso->sol_sbrcv_hiwat); + if (error) { + sodealloc(so); + goto allocfail; + } #ifdef MAC mac_socketpeer_set_from_mbuf(m, so); #endif diff --git a/sys/netlink/netlink_domain.c b/sys/netlink/netlink_domain.c index 74b46114716..e906e0d635a 100644 --- a/sys/netlink/netlink_domain.c +++ b/sys/netlink/netlink_domain.c @@ -330,14 +330,17 @@ nl_attach(struct socket *so, int proto, struct thread *td) so, is_linux ? "(linux) " : "", curproc->p_pid, nl_get_proto_name(proto)); - nlp = malloc(sizeof(struct nlpcb), M_PCB, M_WAITOK | M_ZERO); + mtx_init(&so->so_snd_mtx, "netlink so_snd", NULL, MTX_DEF); + mtx_init(&so->so_rcv_mtx, "netlink so_rcv", NULL, MTX_DEF); error = soreserve(so, nl_sendspace, nl_recvspace); if (error != 0) { - free(nlp, M_PCB); + mtx_destroy(&so->so_snd_mtx); + mtx_destroy(&so->so_rcv_mtx); return (error); } TAILQ_INIT(&so->so_rcv.nl_queue); TAILQ_INIT(&so->so_snd.nl_queue); + nlp = malloc(sizeof(struct nlpcb), M_PCB, M_WAITOK | M_ZERO); so->so_pcb = nlp; nlp->nl_socket = so; nlp->nl_proto = proto; @@ -517,6 +520,9 @@ nl_close(struct socket *so) nl_buf_free(nb); } + mtx_destroy(&so->so_snd_mtx); + mtx_destroy(&so->so_rcv_mtx); + NL_LOG(LOG_DEBUG3, "socket %p, detached", so); /* XXX: is delayed free needed? */