/**************************************************************************** * fs/nfs/rpc_clnt.c * * Copyright (C) 2012 Gregory Nutt. All rights reserved. * Copyright (C) 2012 Jose Pablo Rojas Vargas. All rights reserved. * Author: Jose Pablo Rojas Vargas * * Leveraged from OpenBSD: * * Copyright (c) 2004 The Regents of the University of Michigan. * All rights reserved. * * Copyright (c) 2004 Weston Andros Adamson . * Copyright (c) 2004 Marius Aamodt Eriksen . * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Copyright (c) 1989, 1991, 1993, 1995 The Regents of the University of * California. All rights reserved. * * This code is derived from software contributed to Berkeley by Rick Macklem at * The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. 2. * Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. 3. All advertising * materials mentioning features or use of this software must display the * following acknowledgement: This product includes software developed by the * University of California, Berkeley and its contributors. 4. Neither the * name of the University nor the names of its contributors may be used to * endorse or promote products derived from this software without specific * prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * ****************************************************************************/ #ifndef __FS_NFS_NFS_SOCKET_H #define __FS_NFS_NFS_SOCKET_H /**************************************************************************** * Included Files ****************************************************************************/ #include #include #include #include #include #include #include #include #include #include #include "xdr_subs.h" #include "nfs_proto.h" #include "nfs.h" #include "nfs_args.h" #include "rpc.h" #include "rpc_clnt_private.h" #include "rpc_v2.h" /**************************************************************************** * Pre-processor Definitions ****************************************************************************/ #define RPC_RETURN(X) do { dbg("returning %d", X); return X; }while(0) /* Estimate rto for an nfs rpc sent via. an unreliable datagram. Use the mean * and mean deviation of rtt for the appropriate type of rpc for the frequent * rpcs and a default for the others. The justification for doing "other" * this way is that these rpcs happen so infrequently that timer est. would * probably be stale. Also, since many of these rpcs are non-idempotent, a * conservative timeout is desired. getattr, lookup - A+2D read, write - * A+4D other - nm_timeo */ #define RPC_RTO(n, t) \ ((t) == 0 ? (n)->rc_timeo : \ ((t) < 3 ? \ (((((n)->rc_srtt[t-1] + 3) >> 2) + (n)->rc_sdrtt[t-1] + 1) >> 1) : \ ((((n)->rc_srtt[t-1] + 7) >> 3) + (n)->rc_sdrtt[t-1] + 1))) #define RPC_SRTT(s,r) (r)->r_rpcclnt->rc_srtt[rpcclnt_proct((s),\ (r)->r_procnum) - 1] #define RPC_SDRTT(s,r) (r)->r_rpcclnt->rc_sdrtt[rpcclnt_proct((s),\ (r)->r_procnum) - 1] /* There is a congestion window for outstanding rpcs maintained per mount * point. The cwnd size is adjusted in roughly the way that: Van Jacobson, * Congestion avoidance and Control, In "Proceedings of SIGCOMM '88". ACM, * August 1988. describes for TCP. The cwnd size is chopped in half on a * retransmit timeout and incremented by 1/cwnd when each rpc reply is * received and a full cwnd of rpcs is in progress. (The sent count and cwnd * are scaled for integer arith.) Variants of "slow start" were tried and * were found to be too much of a performance hit (ave. rtt 3 times larger), * I suspect due to the large rtt that nfs rpcs have. */ #define RPC_CWNDSCALE 256 #define RPC_MAXCWND (RPC_CWNDSCALE * 32) #define RPC_ERRSTR_ACCEPTED_SIZE 6 #define RPC_ERRSTR_AUTH_SIZE 6 /**************************************************************************** * Public Data ****************************************************************************/ char *rpc_errstr_accepted[RPC_ERRSTR_ACCEPTED_SIZE] = { "", /* no good message... */ "remote server hasn't exported program.", "remote server can't support version number.", "program can't support procedure.", "procedure can't decode params.", "remote error. remote side memory allocation failure?" }; char *rpc_errstr_denied[2] = { "remote server doesnt support rpc version 2!", "remote server authentication error." }; char *rpc_errstr_auth[RPC_ERRSTR_AUTH_SIZE] = { "", "auth error: bad credential (seal broken).", "auth error: client must begin new session.", "auth error: bad verifier (seal broken).", "auth error: verifier expired or replayed.", "auth error: rejected for security reasons.", }; /**************************************************************************** * Private Data ****************************************************************************/ static int rpcclnt_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, }; /* Static data, mostly RPC constants in XDR form */ static uint32_t rpc_reply, rpc_call, rpc_vers, rpc_msgdenied, rpc_mismatch, rpc_auth_unix, rpc_msgaccepted, rpc_autherr, rpc_auth_null; static uint32_t rpcclnt_xid = 0; static uint32_t rpcclnt_xid_touched = 0; int rpcclnt_ticks; struct rpcstats rpcstats; struct rpc_call *callmgs; struct rpc_reply *replymsg; /* Queue head for rpctask's */ static dq_queue_t *rpctask_q; //struct callout_handle rpcclnt_timer_handle; /**************************************************************************** * Private Function Prototypes ****************************************************************************/ static int rpcclnt_send(struct socket *, struct sockaddr *, struct rpc_call *, struct rpctask *); static int rpcclnt_receive(struct rpctask *, struct sockaddr *, struct rpc_reply *, struct rpc_call *); static int rpcclnt_reply(struct rpctask *, struct rpc_call *, struct rpc_reply *); static void rpcclnt_timer(void *, struct rpc_call *); #ifdef CONFIG_NFS_TCPIP static int rpcclnt_sndlock(int *, struct rpctask *); static void rpcclnt_sndunlock(int *); static int rpcclnt_rcvlock(struct rpctask *); static void rpcclnt_rcvunlock(int *); static int rpcclnt_sigintr(struct rpcclnt *, struct rpctask *, cthread_t *); #endif static void rpcclnt_softterm(struct rpctask *task); static uint32_t rpcclnt_proct(struct rpcclnt *, uint32_t); static int rpcclnt_buildheader(struct rpcclnt *, int, int, void *, struct rpc_call *); /**************************************************************************** * Private Functions ****************************************************************************/ /* This is the nfs send routine. For connection based socket types, it must * be called with an nfs_sndlock() on the socket. "rep == NULL" indicates * that it has been called from a server. For the client side: - return EINTR * if the RPC is terminated, 0 otherwise - set TASK_MUSTRESEND if the send fails * for any reason - do any cleanup required by recoverable socket errors * (???) For the server side: - return EINTR or ERESTART if interrupted by a * signal - return EPIPE if a connection is lost for connection based sockets * (TCP...) - do any cleanup required by recoverable socket errors (???) */ static int rpcclnt_send(struct socket *so, struct sockaddr *nam, struct rpc_call *call, struct rpctask *rep) { struct sockaddr *sendnam; int error, soflags, flags; if (rep != NULL) { if (rep->r_flags & TASK_SOFTTERM) { RPC_RETURN(EINTR); } if ((so = rep->r_rpcclnt->rc_so) == NULL) { rep->r_flags |= TASK_MUSTRESEND; RPC_RETURN(0); } rep->r_flags &= ~TASK_MUSTRESEND; soflags = rep->r_rpcclnt->rc_soflags; } else { soflags = so->s_flags; } #ifdef CONFIG_NFS_TCPIP if ((soflags & PR_CONNREQUIRED)) { sendnam = NULL; { else #endif { sendnam = nam; } if (so->s_type == SOCK_SEQPACKET) { flags = MSG_EOR; } else { flags = 0; } error = psock_sendto(so, call, sizeof(*call), flags, sendnam, sizeof(*sendnam)); if (error != 0) { if (rep != NULL) { ndbg("rpc send error %d for service %s\n", error, rep->r_rpcclnt->rc_prog->prog_name); /* Deal with errors for the client side. */ if (rep->r_flags & TASK_SOFTTERM) { error = EINTR; } else { rep->r_flags |= TASK_MUSTRESEND; } } else { ndbg("rpc service send error %d\n", error); } /* Handle any recoverable (soft) socket errors here. */ if (error != EINTR && error != ERESTART && error != EWOULDBLOCK && error != EPIPE) { error = 0; } } RPC_RETURN(error); } /* Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all * done by soreceive().For SOCK_STREAM, first get the * Record Mark to find out how much more there is to get. We must * lock the socket against other receivers until we have an entire * rpc request/reply. */ static int rpcclnt_receive(struct rpctask *rep, struct sockaddr *aname, struct rpc_reply *reply, struct rpc_call *call) { struct socket *so; #ifdef CONFIG_NFS_TCPIP uint32_t len; #endif int error, sotype, rcvflg; /* Set up arguments for soreceive() */ sotype = rep->r_rpcclnt->rc_sotype; /* For reliable protocols, lock against other senders/receivers in * case a reconnect is necessary. For SOCK_STREAM, first get the * Record Mark to find out how much more there is to get. We must * lock the socket against other receivers until we have an entire * rpc request/reply. */ #ifdef CONFIG_NFS_TCPIP if (sotype != SOCK_DGRAM) { error = rpcclnt_sndlock(&rep->r_rpcclnt->rc_flag, rep); if (error != 0) { return error; } tryagain: /* Check for fatal errors and resending request. * * Ugh: If a reconnect attempt just happened, rc_so would * have changed. NULL indicates a failed attempt that has * essentially shut down this mount point. */ if (rep->r_flags & TASK_SOFTTERM) { rpcclnt_sndunlock(&rep->r_rpcclnt->rc_flag); return EINTR; } so = rep->r_rpcclnt->rc_so; if (so == NULL) { error = rpcclnt_reconnect(rep); if (error) { rpcclnt_sndunlock(&rep->r_rpcclnt->rc_flag); return error; } goto tryagain; } while (rep->r_flags & TASK_MUSTRESEND) { rpcstats.rpcretries++; error = rpcclnt_send(so, rep->r_rpcclnt->rc_name, call, rep); if (error) { if (error == EINTR || error == ERESTART || (error = rpcclnt_reconnect(rep)) != 0) { rpcclnt_sndunlock(&rep->r_rpcclnt->rc_flag); return error; } goto tryagain; } } rpcclnt_sndunlock(&rep->r_rpcclnt->rc_flag); if (sotype == SOCK_STREAM) { do { rcvflg = MSG_WAITALL; error = psock_recvfrom(so, reply, sizeof(*reply), &rcvflg, rep->r_rpcclnt->rc_name, sizeof(*rep->r_rpcclnt->rc_name)); if (error == EWOULDBLOCK && rep && (rep->r_flags & TASK_SOFTTERM)) { RPC_RETURN(EINTR); } } while (error == EWOULDBLOCK); if (error == 0) { ndbg("short receive from rpc server %s\n", rep->r_rpcclnt->rc_prog->prog_name); error = EPIPE; } len = ntohl(len) & ~0x80000000; /* This is SERIOUS! We are out of sync with the * sender and forcing a disconnect/reconnect is all I * can do. */ if (len > RPC_MAXPACKET) { ndbg("%s (%d) from rpc server %s\n", "impossible packet length", len, rep->r_rpcclnt->rc_prog->prog_name); error = EFBIG; goto errout; } do { rcvflg = MSG_WAITALL; error = psock_recvfrom(so, reply, sizeof(*reply), &rcvflg, rep->r_rpcclnt->rc_name, sizeof(*rep->r_rpcclnt->rc_name)); } while (error == EWOULDBLOCK || error == EINTR || error == ERESTART); if (error == 0) { ndbg("short receive from rpc server %s\n", rep->r_rpcclnt->rc_prog->prog_name); error = EPIPE; } if (error != 0) { goto errout; } } else { /* NB: Since uio_resid is big, MSG_WAITALL is ignored * and soreceive() will return when it has either a * control msg or a data msg. We have no use for * control msg., but must grab them and then throw * them away so we know what is going on. */ do { rcvflg = 0; error = psock_recvfrom(so, reply, sizeof(*reply), &rcvflg, rep->r_rpcclnt->rc_name, sizeof(*rep->r_rpcclnt->rc_name)); if (error == EWOULDBLOCK && rep) { if (rep->r_flags & TASK_SOFTTERM) { return EINTR; } } } while (error == EWOULDBLOCK || (!error)); if ((rcvflg & MSG_EOR) == 0) { ndbg("Egad!!\n"); } if (error == 0) { error = EPIPE; } } errout: if (error != 0 && error != EINTR && error != ERESTART) { if (error != EPIPE) { ndbg("receive error %d from rpc server %s\n", error, rep->r_rpcclnt->rc_prog->prog_name); } error = rpcclnt_sndlock(&rep->r_rpcclnt->rc_flag, rep); if (error == 0) { error = rpcclnt_reconnect(rep); } if (error == 0) { goto tryagain; } } } else { #endif if ((so = rep->r_rpcclnt->rc_so) == NULL) { RPC_RETURN(EACCES); } do { rcvflg = 0; error = psock_recvfrom(so, reply, sizeof(*reply), rcvflg, aname, (socklen_t *) sizeof(*aname)); dbg("psock_recvfrom returns %d", error); if (error == EWOULDBLOCK && (rep->r_flags & TASK_SOFTTERM)) { dbg("wouldblock && softerm -> EINTR"); RPC_RETURN(EINTR); } } while (error == EWOULDBLOCK); #ifdef CONFIG_NFS_TCPIP } #endif RPC_RETURN(error); } /* Implement receipt of reply on a socket. We must search through the list of * received datagrams matching them with outstanding requests using the xid, * until ours is found. */ static int rpcclnt_reply(struct rpctask *myrep, struct rpc_call *call, struct rpc_reply *reply) { struct rpctask *rep; struct rpcclnt *rpc = myrep->r_rpcclnt; int32_t t1; struct sockaddr *nam = NULL; uint32_t rxid; int error; /* Loop around until we get our own reply */ for (;;) { /* Lock against other receivers so that I don't get stuck in * sbwait() after someone else has received my reply for me. * Also necessary for connection based protocols to avoid * race conditions during a reconnect. */ #ifdef CONFIG_NFS_TCPIP error = rpcclnt_rcvlock(myrep); if (error) { return error; } #endif /* * Get the next Rpc reply off the socket */ error = rpcclnt_receive(myrep, nam, reply, call); #ifdef CONFIG_NFS_TCPIP rpcclnt_rcvunlock(&rpc->rc_flag); #endif if (error != 0) { /* Ignore routing errors on connectionless * protocols?? */ if (RPCIGNORE_SOERROR(rpc->rc_soflags, error)) { if (myrep->r_flags & TASK_GETONEREP) { RPC_RETURN(0); } ndbg("ingoring routing error on connectionless protocol."); continue; } RPC_RETURN(error); } /* Get the xid and check that it is an rpc reply */ rxid = reply->rp_xid; if (reply->rp_direction != rpc_reply) { rpcstats.rpcinvalid++; if (myrep->r_flags & TASK_GETONEREP) { RPC_RETURN(0); } continue; } /* Loop through the request list to match up the reply Iff no * match, just drop the datagram */ for (rep = (struct rpctask *)rpctask_q->head; rep; rep = (struct rpctask *)rep->r_chain.flink) { if (rxid == rep->r_xid) { /* Update congestion window. Do the additive * increase of one rpc/rtt. */ if (rpc->rc_cwnd <= rpc->rc_sent) { rpc->rc_cwnd += (RPC_CWNDSCALE * RPC_CWNDSCALE + (rpc->rc_cwnd >> 1)) / rpc->rc_cwnd; if (rpc->rc_cwnd > RPC_MAXCWND) { rpc->rc_cwnd = RPC_MAXCWND; } } rep->r_flags &= ~TASK_SENT; rpc->rc_sent -= RPC_CWNDSCALE; /* Update rtt using a gain of 0.125 on the * mean and a gain of 0.25 on the deviation. */ if (rep->r_flags & TASK_TIMING) { /* Since the timer resolution of * NFS_HZ is so course, it can often * result in r_rtt == 0. Since r_rtt * == N means that the actual rtt is * between N+dt and N+2-dt ticks, add * 1. */ t1 = rep->r_rtt + 1; t1 -= (RPC_SRTT(rpc, rep) >> 3); RPC_SRTT(rpc, rep) += t1; if (t1 < 0) { t1 = -t1; } t1 -= (RPC_SDRTT(rpc, rep) >> 2); RPC_SDRTT(rpc, rep) += t1; } rpc->rc_timeouts = 0; break; } } /* If not matched to a request, drop it. If it's mine, get * out. */ if (rep == 0) { rpcstats.rpcunexpected++; dbg("rpc reply not matched\n"); } else if (rep == myrep) { RPC_RETURN(0); } if (myrep->r_flags & TASK_GETONEREP) { RPC_RETURN(0); } } } #ifdef CONFIG_NFS_TCPIP static int rpcclnt_sigintr( struct rpcclnt *rpc, struct rpctask *task, cthread_t *td) { struct proc *p; sigset_t tmpset; if (rpc == NULL) { return EFAULT; } if (ISSET(rpc->rc_flag, RPCCLNT_REDIRECT)) { return 0; } /* XXX deal with forced unmounts */ if (task && ISSET(task->r_flags, TASK_SOFTTERM)) { RPC_RETURN(EINTR); } if (!ISSET(rpc->rc_flag, RPCCLNT_INT)) { RPC_RETURN(0); } if (td == NULL) { return 0; } p = cthread_proc(td); PROC_LOCK(p); tmpset = p->p_siglist; SIGSETNAND(tmpset, td->td_sigmask); mtx_lock(&p->p_sigacts->ps_mtx); SIGSETNAND(tmpset, p->p_sigacts->ps_sigignore); mtx_unlock(&p->p_sigacts->ps_mtx); if (SIGNOTEMPTY(p->p_siglist) && RPCCLNTINT_SIGMASK(tmpset)) { PROC_UNLOCK(p); RPC_RETURN(EINTR); } PROC_UNLOCK(p); RPC_RETURN(0); } /* Lock a socket against others. Necessary for STREAM sockets to ensure you * get an entire rpc request/reply and also to avoid race conditions between * the processes with nfs requests in progress when a reconnect is necessary. */ static int rpcclnt_sndlock(int *flagp, struct rpctask *task) { int slpflag = 0, slptimeo = 0; if (task) { if (task->r_rpcclnt->rc_flag & RPCCLNT_INT) slpflag = PCATCH; } while (*flagp & RPCCLNT_SNDLOCK) { if (rpcclnt_sigintr(task->r_rpcclnt, task, p)) { return EINTR; } *flagp |= RPCCLNT_WANTSND; if (slpflag == PCATCH) { slpflag = 0; slptimeo = 2 * CLOCKS_PER_SEC; } } *flagp |= RPCCLNT_SNDLOCK; return 0; } /* Unlock the stream socket for others. */ static void rpcclnt_sndunlock(int *flagp) { if ((*flagp & RPCCLNT_SNDLOCK) == 0) { panic("rpc sndunlock"); } *flagp &= ~RPCCLNT_SNDLOCK; if (*flagp & RPCCLNT_WANTSND) { *flagp &= ~RPCCLNT_WANTSND; } } static int rpcclnt_rcvlock(struct rpctask *task) { int *flagp = &task->r_rpcclnt->rc_flag; int slpflag, slptimeo = 0; if (*flagp & RPCCLNT_INT) { slpflag = PCATCH; } else { slpflag = 0; } while (*flagp & RPCCLNT_RCVLOCK) { if (rpcclnt_sigintr(task->r_rpcclnt, task, task->r_td)) { return EINTR; } *flagp |= RPCCLNT_WANTRCV; tsleep((caddr_t) flagp, slpflag | (PZERO - 1), "rpcrcvlk", slptimeo); if (slpflag == PCATCH) { slpflag = 0; slptimeo = 2 * CLOCKS_PER_SEC; } } *flagp |= RPCCLNT_RCVLOCK; return 0; } /* Unlock the stream socket for others. */ static void rpcclnt_rcvunlock(int *flagp) { if ((*flagp & RPCCLNT_RCVLOCK) == 0) { panic("nfs rcvunlock"); } *flagp &= ~RPCCLNT_RCVLOCK; if (*flagp & RPCCLNT_WANTRCV) { *flagp &= ~RPCCLNT_WANTRCV; wakeup((caddr_t) flagp); } } #endif static uint32_t rpcclnt_proct(struct rpcclnt *rpc, uint32_t procid) { if (rpc->rc_proctlen != 0 && rpc->rc_proct != NULL && procid < rpc->rc_proctlen) { return rpc->rc_proct[procid]; } return 0; } static void rpcclnt_softterm(struct rpctask *task) { task->r_flags |= TASK_SOFTTERM; if (task->r_flags & TASK_SENT) { task->r_rpcclnt->rc_sent -= RPC_CWNDSCALE; task->r_flags &= ~TASK_SENT; } } /**************************************************************************** * Public Functions ****************************************************************************/ void rpcclnt_init(void) { rpcclnt_ticks = (CLOCKS_PER_SEC * RPC_TICKINTVL + 500) / 1000; if (rpcclnt_ticks < 1) { rpcclnt_ticks = 1; } rpcstats.rpcretries = 0; rpcstats.rpcrequests = 0; rpcstats.rpctimeouts = 0; rpcstats.rpcunexpected = 0; rpcstats.rpcinvalid = 0; /* RPC constants how about actually using more than one of these! */ rpc_reply = txdr_unsigned(RPC_REPLY); rpc_vers = txdr_unsigned(RPC_VER2); rpc_call = txdr_unsigned(RPC_CALL); rpc_msgdenied = txdr_unsigned(RPC_MSGDENIED); rpc_msgaccepted = txdr_unsigned(RPC_MSGACCEPTED); rpc_mismatch = txdr_unsigned(RPC_MISMATCH); rpc_autherr = txdr_unsigned(RPC_AUTHERR); rpc_auth_unix = txdr_unsigned(RPCAUTH_UNIX); rpc_auth_null = txdr_unsigned(RPCAUTH_NULL); /* Initialize rpctask queue */ dq_init(rpctask_q); rpcclnt_timer(NULL, callmgs); nvdbg("rpc initialed"); return; } /* void rpcclnt_uninit(void) { nvdbg("uninit"); untimeout(rpcclnt_timer, (void *)NULL, rpcclnt_timer_handle); } */ /* Initialize sockets and congestion for a new RPC connection. We do not free * the sockaddr if error. */ int rpcclnt_connect(struct rpcclnt *rpc) { struct socket *so; int error; struct sockaddr *saddr; struct sockaddr_in *sin = NULL; struct timeval *tv = NULL; uint16_t tport; /* Create the socket */ rpc->rc_so = NULL; saddr = rpc->rc_name; rpc->rc_sotype = SOCK_DGRAM; error = psock_socket(saddr->sa_family, rpc->rc_sotype, rpc->rc_soproto, rpc->rc_so); if (error != 0) { ndbg("error %d in psock_socket()", error); RPC_RETURN(error); } so = rpc->rc_so; rpc->rc_soflags = so->s_flags; /* Some servers require that the client port be a reserved port * number. We always allocate a reserved port, as this prevents * filehandle disclosure through UDP port capture. */ sin->sin_family = AF_INET; sin->sin_addr.s_addr = INADDR_ANY; tport = 1024; do { tport--; sin->sin_port = htons(tport); error = psock_bind(so, (struct sockaddr *)sin, sizeof(*sin)); } while (error == EADDRINUSE && tport > 1024 / 2); if (error) { ndbg("bind failed\n"); goto bad; } /* Protocols that do not require connections may be optionally left * unconnected for servers that reply from a port other than * NFS_PORT. */ #ifdef CONFIG_NFS_TCPIP if (rpc->rc_soflags == PR_CONNREQUIRED) { error = ENOTCONN; goto bad; } else { #endif error = psock_connect(so, saddr, sizeof(*saddr)); if (error) { dbg("psock_connect returns %d", error); goto bad; } #ifdef CONFIG_NFS_TCPIP } #endif /* Always set receive timeout to detect server crash and reconnect. * Otherwise, we can get stuck in psock_receive forever. */ tv->tv_sec = 1; tv->tv_usec = 0; if ((error = psock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, (const void *)tv, sizeof(*tv)))) { goto bad; } /* Initialize other non-zero congestion variables */ rpc->rc_srtt[0] = rpc->rc_srtt[1] = rpc->rc_srtt[2] = rpc->rc_srtt[3] = (RPC_TIMEO << 3); rpc->rc_sdrtt[0] = rpc->rc_sdrtt[1] = rpc->rc_sdrtt[2] = rpc->rc_sdrtt[3] = 0; rpc->rc_cwnd = RPC_MAXCWND / 2; /* Initial send window */ rpc->rc_sent = 0; rpc->rc_timeouts = 0; RPC_RETURN(0); bad: rpcclnt_disconnect(rpc); RPC_RETURN(error); } /* Reconnect routine: Called when a connection is broken on a reliable * protocol. - clean up the old socket - nfs_connect() again - set * TASK_MUSTRESEND for all outstanding requests on mount point If this * fails the mount point is DEAD! nb: Must be called with the * nfs_sndlock() set on the mount point. */ int rpcclnt_reconnect(struct rpctask *rep) { struct rpctask *rp; struct rpcclnt *rpc = rep->r_rpcclnt; int error; rpcclnt_disconnect(rpc); while ((error = rpcclnt_connect(rpc)) != 0) { if (error == EINTR || error == ERESTART) { return EINTR; } } /* Loop through outstanding request list and fix up all * requests on old socket. */ for (rp = (struct rpctask *)rpctask_q->head; rp != NULL; rp = (struct rpctask *)rp->r_chain.blink) { if (rp->r_rpcclnt == rpc) { rp->r_flags |= TASK_MUSTRESEND; } } return 0; } void rpcclnt_disconnect(struct rpcclnt *rpc) { struct socket *so; if (rpc->rc_so != NULL) { so = rpc->rc_so; rpc->rc_so = NULL; (void)psock_close(so); } } #ifdef CONFIG_NFS_TCPIP void rpcclnt_safedisconnect(struct rpcclnt *rpc) { struct rpctask dummytask; memset((void *)dummytask, 0, sizeof(*call)); dummytask.r_rpcclnt = rpc; rpcclnt_rcvlock(&dummytask); rpcclnt_disconnect(rpc); rpcclnt_rcvunlock(&rpc->rc_flag); } #endif /* XXX: ignores tryagain! */ /* Code from nfs_request - goes something like this - fill in task struct - * links task into list - calls nfs_send() for first transmit - calls * nfs_receive() to get reply - fills in reply (which should be initialized * prior to calling), which is valid when 0 is returned and is NEVER freed in * this function * * always frees the request header, but NEVER frees 'mrest' * * * note that reply->result_* are invalid unless reply->type == * RPC_MSGACCEPTED and reply->status == RPC_SUCCESS and that reply->verf_* * are invalid unless reply->type == RPC_MSGACCEPTED */ int rpcclnt_request(struct rpcclnt *rpc, int procnum, struct rpc_reply *reply, void *datain) { struct rpc_call *call = NULL; struct rpc_reply *replysvr = NULL; struct rpctask *task, _task; int error = 0; int xid = 0; task = &_task; memset(task, 0, sizeof(*task)); task->r_rpcclnt = rpc; task->r_procnum = procnum; error = rpcclnt_buildheader(rpc, procnum, xid, datain, call); if (error) { ndbg("building call header error"); goto rpcmout; } task->r_xid = fxdr_unsigned(uint32_t, xid); if (rpc->rc_flag & RPCCLNT_SOFT) { task->r_retry = rpc->rc_retry; } else { task->r_retry = RPC_MAXREXMIT + 1; /* past clip limit */ } task->r_rtt = task->r_rexmit = 0; if (rpcclnt_proct(rpc, procnum) > 0) { task->r_flags = TASK_TIMING; } else { task->r_flags = 0; } /* Do the client side RPC. */ rpcstats.rpcrequests++; /* Chain request into list of outstanding requests. Be sure to put it * LAST so timer finds oldest requests first. */ dq_addlast(&task->r_chain, rpctask_q); /* If backing off another request or avoiding congestion, don't send * this one now but let timer do it. If not timing a request, do it * now. */ if (rpc->rc_so && (rpc->rc_sotype != SOCK_DGRAM || (rpc->rc_flag & RPCCLNT_DUMBTIMR) || rpc->rc_sent < rpc->rc_cwnd)) { #ifdef CONFIG_NFS_TCPIP if (rpc->rc_soflags & PR_CONNREQUIRED) { error = rpcclnt_sndlock(&rpc->rc_flag, task); } #endif if (error == 0) { error = rpcclnt_send(rpc->rc_so, rpc->rc_name, call, task); #ifdef CONFIG_NFS_TCPIP if (rpc->rc_soflags & PR_CONNREQUIRED) { rpcclnt_sndunlock(&rpc->rc_flag); } #endif } if (error == 0 && (task->r_flags & TASK_MUSTRESEND) == 0) { rpc->rc_sent += RPC_CWNDSCALE; task->r_flags |= TASK_SENT; } } else { task->r_rtt = -1; } /* Wait for the reply from our send. */ if (error == 0 || error == EPIPE) { error = rpcclnt_reply(task, call, replysvr); } /* RPC done, unlink the request. */ dq_rem(&task->r_chain, rpctask_q); /* Decrement the outstanding request count. */ if (task->r_flags & TASK_SENT) { task->r_flags &= ~TASK_SENT; /* paranoia */ rpc->rc_sent -= RPC_CWNDSCALE; } if (error != 0) { goto rpcmout; } /* Break down the rpc header and check if ok */ reply->stat.type = fxdr_unsigned(uint32_t, replysvr->stat.type); if (reply->stat.type == RPC_MSGDENIED) { reply->stat.status = fxdr_unsigned(uint32_t, replysvr->stat.status); switch (reply->stat.status) { case RPC_MISMATCH: reply->stat.mismatch_info.low = fxdr_unsigned(uint32_t, replysvr->stat.mismatch_info.low); reply->stat.mismatch_info.high = fxdr_unsigned(uint32_t, replysvr->stat.mismatch_info.high); ndbg("RPC_MSGDENIED: RPC_MISMATCH error"); error = EOPNOTSUPP; break; case RPC_AUTHERR: reply->stat.autherr = fxdr_unsigned(uint32_t, replysvr->stat.autherr); ndbg("RPC_MSGDENIED: RPC_AUTHERR error"); error = EACCES; break; default: error = EOPNOTSUPP; break; } goto rpcmout; } else if (reply->stat.type != RPC_MSGACCEPTED) { error = EOPNOTSUPP; goto rpcmout; } /* Verifier */ reply->rpc_verfi.authtype = fxdr_unsigned(uint32_t, replysvr->rpc_verfi.authtype); reply->rpc_verfi.authlen = fxdr_unsigned(uint32_t, replysvr->rpc_verfi.authlen); if (reply->stat.status == RPC_SUCCESS) { nvdbg("RPC_SUCCESS"); reply->stat.where = replysvr->stat.where; } else if (reply->stat.status == RPC_PROGMISMATCH) { reply->stat.mismatch_info.low = fxdr_unsigned(uint32_t, replysvr->stat.mismatch_info.low); reply->stat.mismatch_info.high = fxdr_unsigned(uint32_t, replysvr->stat.mismatch_info.high); ndbg("RPC_MSGACCEPTED: RPC_PROGMISMATCH error"); error = EOPNOTSUPP; /* XXXMARIUS */ } else if (reply->stat.status > 5) { error = EOPNOTSUPP; goto rpcmout; } rpcmout: RPC_RETURN(error); } /* Nfs timer routine Scan the nfsreq list and retranmit any requests that * have timed out To avoid retransmission attempts on STREAM sockets (in the * future) make sure to set the r_retry field to 0 (implies nm_retry == 0). */ void rpcclnt_timer(void *arg, struct rpc_call *call) { struct rpctask *rep; struct socket *so; struct rpcclnt *rpc; int timeo, error; for (rep = (struct rpctask *)rpctask_q->head; rep; rep = (struct rpctask *)rep->r_chain.flink) { rpc = rep->r_rpcclnt; if (rep->r_flags & TASK_SOFTTERM) { continue; } if (rep->r_rtt >= 0) { rep->r_rtt++; if (rpc->rc_flag & RPCCLNT_DUMBTIMR) { timeo = rpc->rc_timeo; } else { timeo = RPC_RTO(rpc, rpcclnt_proct(rep->r_rpcclnt, rep->r_procnum)); } if (rpc->rc_timeouts > 0) { timeo *= rpcclnt_backoff[rpc->rc_timeouts - 1]; } if (rep->r_rtt <= timeo) { continue; } if (rpc->rc_timeouts < 8) { rpc->rc_timeouts++; } } /* Check for server not responding */ if ((rep->r_flags & TASK_TPRINTFMSG) == 0 && rep->r_rexmit > rpc->rc_deadthresh) { ndbg("Server is not responding"); rep->r_flags |= TASK_TPRINTFMSG; } if (rep->r_rexmit >= rep->r_retry) { /* too many */ rpcstats.rpctimeouts++; rep->r_flags |= TASK_SOFTTERM; continue; } if (rpc->rc_sotype != SOCK_DGRAM) { if (++rep->r_rexmit > RPC_MAXREXMIT) { rep->r_rexmit = RPC_MAXREXMIT; } continue; } if ((so = rpc->rc_so) == NULL) { continue; } /* If there is enough space and the window allows.. Resend it * Set r_rtt to -1 in case we fail to send it now. */ rep->r_rtt = -1; if ((rpc->rc_flag & RPCCLNT_DUMBTIMR) || (rep->r_flags & TASK_SENT) || rpc->rc_sent < rpc->rc_cwnd) { if ((rpc->rc_flag & RPCCLNT_NOCONN) == 0) { error = psock_sendto(so, call, sizeof(*call), 0, NULL, 0); } else { error = psock_sendto(so, call, sizeof(*call), 0, rpc->rc_name, sizeof(*rpc->rc_name)); } if (!error) { /* Iff first send, start timing else turn * timing off, backoff timer and divide * congestion window by 2. */ if (rep->r_flags & TASK_SENT) { rep->r_flags &= ~TASK_TIMING; if (++rep->r_rexmit > RPC_MAXREXMIT) { rep->r_rexmit = RPC_MAXREXMIT; } rpc->rc_cwnd >>= 1; if (rpc->rc_cwnd < RPC_CWNDSCALE) { rpc->rc_cwnd = RPC_CWNDSCALE; } rpcstats.rpcretries++; } else { rep->r_flags |= TASK_SENT; rpc->rc_sent += RPC_CWNDSCALE; } rep->r_rtt = 0; } } } // rpcclnt_timer_handle = timeout(rpcclnt_timer, NULL, rpcclnt_ticks); } /* Build the RPC header and fill in the authorization info. */ int rpcclnt_buildheader(struct rpcclnt *rc, int procid, int xidp, void *datain, struct rpc_call *call) { struct timeval *tv = NULL; srand(time(NULL)); /* The RPC header.*/ /* Get a new (non-zero) xid */ if ((rpcclnt_xid == 0) && (rpcclnt_xid_touched == 0)) { rpcclnt_xid = rand(); rpcclnt_xid_touched = 1; } else { do { xidp = rand(); } while ((xidp % 256) == 0); rpcclnt_xid += xidp; } call->rp_xid = xidp = txdr_unsigned(rpcclnt_xid); call->rp_direction = rpc_call; call->rp_rpcvers = rpc_vers; call->rp_prog = txdr_unsigned(rc->rc_prog->prog_id); call->rp_vers = txdr_unsigned(rc->rc_prog->prog_version); call->rp_proc = txdr_unsigned(procid); call->data = datain; /* rpc_auth part (auth_unix as root) */ call->rpc_auth.authtype = rpc_auth_null; call->rpc_auth.authlen = txdr_unsigned(sizeof(NULL)); tv->tv_sec = 1; tv->tv_usec = 0; #ifdef CONFIG_NFS_UNIX_AUTH call->rpc_unix.ua_time = txdr_unsigned(tv->tv_sec); call->rpc_unix.ua_hostname = 0; call->rpc_unix.ua_uid = geteuid(); call->rpc_unix.ua_gid = getegid(); call->rpc_unix.ua_gidlist = 0; #endif /* rpc_verf part (auth_null) */ call->rpc_verf.authtype = 0; call->rpc_verf.authlen = 0; return 0; } int rpcclnt_cancelreqs(struct rpcclnt *rpc) { struct rpctask *task; int i; for (task = (struct rpctask *)rpctask_q->head; task; task = (struct rpctask *)task->r_chain.flink) { if (rpc != task->r_rpcclnt || (task->r_flags & TASK_SOFTTERM)) { continue; } rpcclnt_softterm(task); } for (i = 0; i < 30; i++) { for (task = (struct rpctask *)rpctask_q->head; task; task = (struct rpctask *)task->r_chain.flink) { if (rpc == task->r_rpcclnt) { break; } } if (task == NULL) { return 0; } } return EBUSY; } #endif