2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 #include <afsconfig.h>
11 #include "afs/param.h"
14 #include "rx/rx_kcommon.h"
15 #include "rx/rx_packet.h"
16 #include "h/tcp-param.h"
17 /* This must be loaded after proc.h to avoid macro collision with a variable*/
18 #include "netinet/udp_var.h"
23 #ifdef RXK_LISTENER_ENV
25 * OS dependent part of kernel RX listener thread.
28 * so socket to receive on, typically rx_socket
29 * from pointer to a sockaddr_in.
30 * iov array of iovecs to fill in.
31 * iovcnt how many iovecs there are.
32 * lengthp IN/OUT in: total space available in iovecs. out: size of read.
36 * error code (such as EINTR) if not
39 * Note that the maximum number of iovecs is 2 + RX_MAXWVECS. This is
40 * so we have a little space to look for packets larger than
43 int rxk_lastSocketError
= 0;
44 int rxk_nSocketErrors
= 0;
45 int rxk_nSignalsCleared
= 0;
48 osi_NetReceive(osi_socket so
, struct sockaddr_in
*addr
, struct iovec
*dvec
,
49 int nvecs
, int *alength
)
53 struct mbuf
*maddr
= NULL
;
54 struct sockaddr_in
*taddr
;
55 struct iovec tmpvec
[RX_MAXWVECS
+ 2];
58 BHV_PDATA(&bhv
) = (void *)so
;
61 memset(&tuio
, 0, sizeof(tuio
));
62 memset(&tmpvec
, 0, sizeof(tmpvec
));
64 tuio
.uio_iov
= tmpvec
;
65 tuio
.uio_iovcnt
= nvecs
;
67 tuio
.uio_segflg
= AFS_UIOSYS
;
69 tuio
.uio_resid
= *alength
;
73 if (nvecs
> RX_MAXWVECS
+ 2) {
74 osi_Panic("Too many (%d) iovecs passed to osi_NetReceive\n", nvecs
);
76 memcpy(tmpvec
, (char *)dvec
, (RX_MAXWVECS
+ 1) * sizeof(struct iovec
));
78 code
= soreceive(&bhv
, &maddr
, &tuio
, NULL
, NULL
);
80 code
= soreceive(so
, &maddr
, &tuio
, NULL
, NULL
);
85 /* Clear the error before using the socket again. I've tried being nice
86 * and blocking SIGKILL and SIGSTOP from the kernel, but they get
87 * delivered anyway. So, time to be crude and just clear the signals
88 * pending on this thread.
91 uthread_t
*ut
= curuthread
;
94 sigemptyset(&ut
->ut_sig
);
96 thread_interrupt_clear(UT_TO_KT(ut
), 1);
98 rxk_nSignalsCleared
++;
101 /* Clear the error before using the socket again. */
103 rxk_lastSocketError
= code
;
108 *alength
= *alength
- tuio
.uio_resid
;
110 memcpy((char *)addr
, (char *)mtod(maddr
, struct sockaddr_in
*),
111 sizeof(struct sockaddr_in
));
119 #else /* RXK_LISTENER_ENV */
121 static struct protosw parent_proto
; /* udp proto switch */
124 * RX input, fast timer and initialization routines.
129 rxk_input(struct mbuf
*am
, struct ifnet
*aif
, struct ipsec
*spec
)
132 rxk_input(struct mbuf
*am
, struct ifnet
*aif
)
140 struct udpiphdr
*tvu
;
143 struct sockaddr_in taddr
;
148 /* make sure we have base ip and udp headers in first mbuf */
149 if (am
->m_off
> MMAXOFF
|| am
->m_len
< 28) {
150 am
= m_pullup(am
, 28);
155 hdr
= (mtod(am
, struct ip
*))->ip_hl
;
157 /* pull up more, the IP hdr is bigger than usual */
158 if (am
->m_len
< (8 + (hdr
<< 2))) {
159 am
= m_pullup(am
, 8 + (hdr
<< 2));
163 ti
= mtod(am
, struct ip
*); /* recompute, since m_pullup allocates new mbuf */
164 tu
= (struct udphdr
*)(((char *)ti
) + (hdr
<< 2)); /* skip ip hdr */
166 ti
= mtod(am
, struct ip
*);
167 tu
= (struct udphdr
*)(((char *)ti
) + 20); /* skip basic ip hdr */
169 /* now read the port out */
173 for (tsp
= rxk_ports
, i
= 0; i
< MAXRXPORTS
; i
++) {
174 if (*tsp
++ == port
) {
175 /* checksum the packet */
177 ip_stripoptions(am
, (struct mbuf
*)0); /* get rid of anything we don't need */
178 tu
= (struct udphdr
*)(((char *)ti
) + 20);
181 * Make mbuf data length reflect UDP length.
182 * If not enough data to reflect UDP length, drop.
184 tvu
= (struct udpiphdr
*)ti
;
185 tlen
= ntohs((u_short
) tvu
->ui_ulen
);
186 if ((int)ti
->ip_len
!= tlen
) {
187 if (tlen
> (int)ti
->ip_len
) {
191 m_adj(am
, tlen
- (int)ti
->ip_len
);
193 /* deliver packet to rx */
194 taddr
.sin_family
= AF_INET
; /* compute source address */
195 taddr
.sin_port
= tu
->uh_sport
;
196 taddr
.sin_addr
.s_addr
= ti
->ip_src
.s_addr
;
197 /* handle the checksum. Note that this code damages the actual ip
198 * header (replacing it with the virtual one, which is the same size),
199 * so we must ensure we get everything out we need, first */
200 if (tu
->uh_sum
!= 0) {
201 /* if the checksum is there, always check it. It's crazy not
202 * to, unless you can really be sure that your
203 * underlying network (and interfaces and drivers and
204 * DMA hardware, etc!) is error-free. First, fill
205 * in entire virtual ip header. */
209 tvu
->ui_len
= tvu
->ui_ulen
;
210 tlen
= ntohs((unsigned short)(tvu
->ui_ulen
));
211 if ((!(am
->m_flags
& M_CKSUMMED
))
212 && in_cksum(am
, sizeof(struct ip
) + tlen
)) {
213 /* checksum, including cksum field, doesn't come out 0, so
214 * this packet is bad */
221 * 28 is IP (20) + UDP (8) header. ulen includes
222 * udp header, and we *don't* tell RX about udp
223 * header either. So, we remove those 8 as well.
225 data_len
= ntohs(tu
->uh_ulen
);
227 if (!(*rxk_GetPacketProc
) (&phandle
, data_len
)) {
228 if (rx_mb_to_packet(am
, m_freem
, 28, data_len
, phandle
)) {
229 /* XXX should just increment counter here.. */
230 printf("rx: truncated UDP packet\n");
231 rxi_FreePacket(phandle
);
233 (*rxk_PacketArrivalProc
) (phandle
, &taddr
,
234 rxk_portRocks
[i
], data_len
);
242 /* if we get here, try to deliver packet to udp */
243 if (tproc
= parent_proto
.pr_input
)
249 * UDP fast timer to raise events for all but Solaris and NCR.
250 * Called about 5 times per second (at unknown priority?). Must go to
251 * splnet or obtain global lock before touching anything significant.
259 /* do rx fasttimo processing here */
260 rxevent_RaiseEvents(&temp
);
261 if (tproc
= parent_proto
.pr_fasttimo
)
266 /* start intercepting basic calls */
270 struct protosw
*tpro
, *last
;
274 last
= inetdomain
.dom_protoswNPROTOSW
;
275 for (tpro
= inetdomain
.dom_protosw
; tpro
< last
; tpro
++) {
276 if (tpro
->pr_protocol
== IPPROTO_UDP
) {
277 memcpy(&parent_proto
, tpro
, sizeof(parent_proto
));
278 tpro
->pr_input
= rxk_input
;
279 tpro
->pr_fasttimo
= rxk_fasttimo
;
284 osi_Panic("inet:no udp");
286 #endif /* RXK_LISTENER_ENV */
289 * RX IP address routines.
292 static afs_uint32 myNetAddrs
[ADDRSPERSITE
];
293 static int myNetMTUs
[ADDRSPERSITE
];
294 static int myNetFlags
[ADDRSPERSITE
];
295 static int numMyNetAddrs
= 0;
297 /* This version doesn't even begin to handle iterative requests, but then
298 * we don't yet use them anyway. Fix this when rxi_InitPeerParams is changed
299 * to find a true maximum.
302 rxi_MatchIfnet(struct hashbucket
*h
, caddr_t key
, caddr_t arg1
, caddr_t arg2
)
304 afs_uint32 ppaddr
= *(afs_uint32
*) key
;
305 int match_value
= *(int *)arg1
;
306 struct in_ifaddr
*ifa
= (struct in_ifaddr
*)h
;
307 struct sockaddr_in
*sin
;
309 if ((ppaddr
& ifa
->ia_netmask
) == ifa
->ia_net
) {
310 if ((ppaddr
& ifa
->ia_subnetmask
) == ifa
->ia_subnet
) {
312 if (sin
->sin_addr
.s_addr
== ppaddr
) { /* ie, ME!!! */
314 *(struct in_ifaddr
**)arg2
= ifa
;
316 if (match_value
< 3) {
317 *(struct in_ifaddr
**)arg2
= ifa
;
321 if (match_value
< 2) {
322 *(struct in_ifaddr
**)arg2
= ifa
;
327 *(int *)arg1
= match_value
;
333 rxi_FindIfnet(afs_uint32 addr
, afs_uint32
* maskp
)
337 struct in_ifaddr
*ifad
;
339 if (numMyNetAddrs
== 0)
340 (void)rxi_GetIFInfo();
342 ppaddr
= ntohl(addr
);
343 ifad
= (struct in_ifaddr
*)&hashinfo_inaddr
;
345 (void)hash_enum(&hashinfo_inaddr
, rxi_MatchIfnet
, HTF_INET
,
346 (caddr_t
) & ppaddr
, (caddr_t
) & match_value
,
351 *maskp
= ifad
->ia_subnetmask
;
358 rxi_EnumGetIfInfo(struct hashbucket
*h
, caddr_t key
, caddr_t arg1
,
361 int different
= *(int *)arg1
;
362 int i
= *(int *)arg2
;
363 struct in_ifaddr
*iap
= (struct in_ifaddr
*)h
;
368 if (i
>= ADDRSPERSITE
)
372 rxmtu
= (ifnp
->if_mtu
- RX_IPUDP_SIZE
);
373 ifinaddr
= ntohl(iap
->ia_addr
.sin_addr
.s_addr
);
374 if (myNetAddrs
[i
] != ifinaddr
) {
375 myNetAddrs
[i
] = ifinaddr
;
376 myNetMTUs
[i
] = rxmtu
;
378 *(int *)arg1
= different
;
380 rxmtu
= rxmtu
* rxi_nRecvFrags
+ ((rxi_nRecvFrags
- 1) * UDP_HDR_SIZE
);
381 if (!rx_IsLoopbackAddr(ifinaddr
) && (rxmtu
> rx_maxReceiveSize
)) {
382 rx_maxReceiveSize
= MIN(RX_MAX_PACKET_SIZE
, rxmtu
);
383 rx_maxReceiveSize
= MIN(rx_maxReceiveSize
, rx_maxReceiveSizeUser
);
386 *(int *)arg2
= i
+ 1;
396 /* SGI 6.2 does not have a pointer from the ifnet to the list of
397 * of addresses (if_addrlist). So it's more efficient to run the
398 * in_ifaddr list and use the back pointers to the ifnet struct's.
400 (void)hash_enum(&hashinfo_inaddr
, rxi_EnumGetIfInfo
, HTF_INET
, NULL
,
401 (caddr_t
) & different
, (caddr_t
) & i
);
403 rx_maxJumboRecvSize
=
404 RX_HEADER_SIZE
+ rxi_nDgramPackets
* RX_JUMBOBUFFERSIZE
+
405 (rxi_nDgramPackets
- 1) * RX_JUMBOHEADERSIZE
;
406 rx_maxJumboRecvSize
= MAX(rx_maxJumboRecvSize
, rx_maxReceiveSize
);
411 /* osi_NetSend - from the now defunct afs_osinet.c */
420 osi_NetSend(asocket
, addr
, dvec
, nvec
, asize
, istack
)
425 struct sockaddr_in
*addr
;
429 struct iovec tvecs
[RX_MAXWVECS
+ 1];
436 memset(&tuio
, 0, sizeof(tuio
));
437 memset(&tvecs
, 0, sizeof(tvecs
));
439 if (nvec
> RX_MAXWVECS
+ 1) {
440 osi_Panic("osi_NetSend: %d: Too many iovecs.\n", nvec
);
442 memcpy((char *)tvecs
, (char *)dvec
, nvec
* sizeof(struct iovec
));
444 tuio
.uio_iov
= tvecs
;
445 tuio
.uio_iovcnt
= nvec
;
446 tuio
.uio_segflg
= UIO_SYSSPACE
;
448 tuio
.uio_sigpipe
= 0;
453 for (i
= 0, iovp
= tvecs
; i
< nvec
; i
++, iovp
++)
454 tuio
.uio_resid
+= iovp
->iov_len
;
457 to
= m_get(M_WAIT
, MT_SONAME
);
458 to
->m_len
= sizeof(struct sockaddr_in
);
459 memcpy(mtod(to
, caddr_t
), (char *)addr
, to
->m_len
);
461 BHV_PDATA(&bhv
) = (void *)asocket
;
462 code
= sosend(&bhv
, to
, &tuio
, 0, NULL
);
467 #else /* AFS_SGI65_ENV */
470 dummy_sblock(struct sockbuf
*a
, int b
, struct socket
*c
, int *d
, int e
)
473 ("sblock was called before it was installed. Install proper afsd.\n");
477 dummy_sbunlock(struct sockbuf
*a
, int b
, struct socket
*c
, int d
)
480 ("sbunlock was called before it was installed. Install proper afsd.\n");
483 int (*afs_sblockp
) (struct sockbuf
*, int, struct socket
*, int *, int) =
485 void (*afs_sbunlockp
) (struct sockbuf
*, int, struct socket
*, int) =
487 #define AFS_SBUNLOCK(SB, EV, SO, O) (*afs_sbunlockp)(SB, EV, SO, O)
489 /* osi_NetSend - send asize bytes at adata from asocket to host at addr.
491 * Now, why do we allocate a new buffer when we could theoretically use the one
492 * pointed to by adata? Because PRU_SEND returns after queueing the message,
493 * not after sending it. If the sender changes the data after queueing it,
494 * we'd see the already-queued data change. One attempt to fix this without
495 * adding a copy would be to have this function wait until the datagram is
496 * sent; however this doesn't work well. In particular, if a host is down, and
497 * an ARP fails to that host, this packet will be queued until the ARP request
498 * comes back, which could be hours later. We can't block in this routine that
499 * long, since it prevents RPC timeouts from happening.
501 /* XXX In the brave new world, steal the data bufs out of the rx_packet iovec,
502 * and just queue those. XXX
505 osi_NetSend(asocket
, addr
, dvec
, nvec
, asize
, istack
)
506 struct socket
*asocket
;
510 struct sockaddr_in
*addr
;
513 struct mbuf
*tm
, *um
;
516 struct mbuf
*top
= 0;
517 struct mbuf
*m
, **mp
;
524 AFS_STATCNT(osi_NetSend
);
526 (*afs_sblockp
) (&asocket
->so_snd
, NETEVENT_SODOWN
, asocket
, &s1
, istack
);
531 tdata
= dvec
[i
].iov_base
;
532 tl
= dvec
[i
].iov_len
;
534 if ((m
= m_vget(M_DONTWAIT
, MIN(asize
, VCL_MAX
), MT_DATA
)) == NULL
) {
538 AFS_SBUNLOCK(&asocket
->so_snd
, NETEVENT_SODOWN
, asocket
, s1
);
541 len
= MIN(m
->m_len
, asize
);
543 tpa
= mtod(m
, caddr_t
);
546 memcpy(tpa
, tdata
, rlen
);
556 /* shouldn't come here! */
557 asize
= 0; /* so we make progress toward completion */
560 tdata
= dvec
[i
].iov_base
;
561 tl
= dvec
[i
].iov_len
;
573 /* setup mbuf corresponding to destination address */
574 um
= m_get(M_DONTWAIT
, MT_SONAME
);
577 m_freem(top
); /* free mbuf chain */
578 /* if this were vfs40, we'd do sbunlock(asocket, &asocket->so_snd), but
579 * we don't do the locking at all for vfs40 systems */
581 AFS_SBUNLOCK(&asocket
->so_snd
, NETEVENT_SODOWN
, asocket
, s1
);
584 memcpy(mtod(um
, caddr_t
), addr
, sizeof(*addr
));
585 um
->m_len
= sizeof(*addr
);
586 /* note that udp_usrreq frees funny mbuf. We hold onto data, but mbuf
587 * around it is gone. we free address ourselves. */
588 code
= (*asocket
->so_proto
->pr_usrreq
) (asocket
, PRU_SEND
, tm
, um
, 0);
591 AFS_SBUNLOCK(&asocket
->so_snd
, NETEVENT_SODOWN
, asocket
, s1
);
595 #endif /* AFS_SGI65_ENV */