Import Upstream version 1.8.5
[hcoop/debian/openafs.git] / src / rx / IRIX / rx_knet.c
1 /*
2 * Copyright 2000, International Business Machines Corporation and others.
3 * All Rights Reserved.
4 *
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
8 */
9
10 #include <afsconfig.h>
11 #include "afs/param.h"
12
13
14 #include "rx/rx_kcommon.h"
15 #include "rx/rx_packet.h"
16 #include "h/tcp-param.h"
17 /* This must be loaded after proc.h to avoid macro collision with a variable*/
18 #include "netinet/udp_var.h"
19
20
21
22
23 #ifdef RXK_LISTENER_ENV
24 /* osi_NetReceive
25 * OS dependent part of kernel RX listener thread.
26 *
27 * Arguments:
28 * so socket to receive on, typically rx_socket
29 * from pointer to a sockaddr_in.
30 * iov array of iovecs to fill in.
31 * iovcnt how many iovecs there are.
32 * lengthp IN/OUT in: total space available in iovecs. out: size of read.
33 *
34 * Return
35 * 0 if successful
36 * error code (such as EINTR) if not
37 *
38 * Environment
39 * Note that the maximum number of iovecs is 2 + RX_MAXWVECS. This is
40 * so we have a little space to look for packets larger than
41 * rx_maxReceiveSize.
42 */
43 int rxk_lastSocketError = 0;
44 int rxk_nSocketErrors = 0;
45 int rxk_nSignalsCleared = 0;
46
47 int
48 osi_NetReceive(osi_socket so, struct sockaddr_in *addr, struct iovec *dvec,
49 int nvecs, int *alength)
50 {
51 struct uio tuio;
52 int code;
53 struct mbuf *maddr = NULL;
54 struct sockaddr_in *taddr;
55 struct iovec tmpvec[RX_MAXWVECS + 2];
56 #ifdef AFS_SGI65_ENV
57 bhv_desc_t bhv;
58 BHV_PDATA(&bhv) = (void *)so;
59 #endif
60
61 memset(&tuio, 0, sizeof(tuio));
62 memset(&tmpvec, 0, sizeof(tmpvec));
63
64 tuio.uio_iov = tmpvec;
65 tuio.uio_iovcnt = nvecs;
66 tuio.uio_offset = 0;
67 tuio.uio_segflg = AFS_UIOSYS;
68 tuio.uio_fmode = 0;
69 tuio.uio_resid = *alength;
70 tuio.uio_pio = 0;
71 tuio.uio_pbuf = 0;
72
73 if (nvecs > RX_MAXWVECS + 2) {
74 osi_Panic("Too many (%d) iovecs passed to osi_NetReceive\n", nvecs);
75 }
76 memcpy(tmpvec, (char *)dvec, (RX_MAXWVECS + 1) * sizeof(struct iovec));
77 #ifdef AFS_SGI65_ENV
78 code = soreceive(&bhv, &maddr, &tuio, NULL, NULL);
79 #else
80 code = soreceive(so, &maddr, &tuio, NULL, NULL);
81 #endif
82
83 if (code) {
84 #ifdef AFS_SGI65_ENV
85 /* Clear the error before using the socket again. I've tried being nice
86 * and blocking SIGKILL and SIGSTOP from the kernel, but they get
87 * delivered anyway. So, time to be crude and just clear the signals
88 * pending on this thread.
89 */
90 if (code == EINTR) {
91 uthread_t *ut = curuthread;
92 int s;
93 s = ut_lock(ut);
94 sigemptyset(&ut->ut_sig);
95 ut->ut_cursig = 0;
96 thread_interrupt_clear(UT_TO_KT(ut), 1);
97 ut_unlock(ut, s);
98 rxk_nSignalsCleared++;
99 }
100 #endif
101 /* Clear the error before using the socket again. */
102 so->so_error = 0;
103 rxk_lastSocketError = code;
104 rxk_nSocketErrors++;
105 if (maddr)
106 m_freem(maddr);
107 } else {
108 *alength = *alength - tuio.uio_resid;
109 if (maddr) {
110 memcpy((char *)addr, (char *)mtod(maddr, struct sockaddr_in *),
111 sizeof(struct sockaddr_in));
112 m_freem(maddr);
113 } else {
114 return -1;
115 }
116 }
117 return code;
118 }
119 #else /* RXK_LISTENER_ENV */
120
121 static struct protosw parent_proto; /* udp proto switch */
122
123 /*
124 * RX input, fast timer and initialization routines.
125 */
126
127 #ifdef AFS_SGI64_ENV
128 static void
129 rxk_input(struct mbuf *am, struct ifnet *aif, struct ipsec *spec)
130 #else
131 static void
132 rxk_input(struct mbuf *am, struct ifnet *aif)
133 #endif
134 {
135 void (*tproc) ();
136 unsigned short *tsp;
137 int hdr;
138 struct udphdr *tu;
139 struct ip *ti;
140 struct udpiphdr *tvu;
141 int i;
142 char *phandle;
143 struct sockaddr_in taddr;
144 int tlen;
145 short port;
146 int data_len;
147
148 /* make sure we have base ip and udp headers in first mbuf */
149 if (am->m_off > MMAXOFF || am->m_len < 28) {
150 am = m_pullup(am, 28);
151 if (!am)
152 return;
153 }
154
155 hdr = (mtod(am, struct ip *))->ip_hl;
156 if (hdr > 5) {
157 /* pull up more, the IP hdr is bigger than usual */
158 if (am->m_len < (8 + (hdr << 2))) {
159 am = m_pullup(am, 8 + (hdr << 2));
160 if (!am)
161 return;
162 }
163 ti = mtod(am, struct ip *); /* recompute, since m_pullup allocates new mbuf */
164 tu = (struct udphdr *)(((char *)ti) + (hdr << 2)); /* skip ip hdr */
165 } else {
166 ti = mtod(am, struct ip *);
167 tu = (struct udphdr *)(((char *)ti) + 20); /* skip basic ip hdr */
168 }
169 /* now read the port out */
170 port = tu->uh_dport;
171
172 if (port) {
173 for (tsp = rxk_ports, i = 0; i < MAXRXPORTS; i++) {
174 if (*tsp++ == port) {
175 /* checksum the packet */
176 if (hdr > 5) {
177 ip_stripoptions(am, (struct mbuf *)0); /* get rid of anything we don't need */
178 tu = (struct udphdr *)(((char *)ti) + 20);
179 }
180 /*
181 * Make mbuf data length reflect UDP length.
182 * If not enough data to reflect UDP length, drop.
183 */
184 tvu = (struct udpiphdr *)ti;
185 tlen = ntohs((u_short) tvu->ui_ulen);
186 if ((int)ti->ip_len != tlen) {
187 if (tlen > (int)ti->ip_len) {
188 m_free(am);
189 return;
190 }
191 m_adj(am, tlen - (int)ti->ip_len);
192 }
193 /* deliver packet to rx */
194 taddr.sin_family = AF_INET; /* compute source address */
195 taddr.sin_port = tu->uh_sport;
196 taddr.sin_addr.s_addr = ti->ip_src.s_addr;
197 /* handle the checksum. Note that this code damages the actual ip
198 * header (replacing it with the virtual one, which is the same size),
199 * so we must ensure we get everything out we need, first */
200 if (tu->uh_sum != 0) {
201 /* if the checksum is there, always check it. It's crazy not
202 * to, unless you can really be sure that your
203 * underlying network (and interfaces and drivers and
204 * DMA hardware, etc!) is error-free. First, fill
205 * in entire virtual ip header. */
206 tvu->ui_next = 0;
207 tvu->ui_prev = 0;
208 tvu->ui_x1 = 0;
209 tvu->ui_len = tvu->ui_ulen;
210 tlen = ntohs((unsigned short)(tvu->ui_ulen));
211 if ((!(am->m_flags & M_CKSUMMED))
212 && in_cksum(am, sizeof(struct ip) + tlen)) {
213 /* checksum, including cksum field, doesn't come out 0, so
214 * this packet is bad */
215 m_freem(am);
216 return;
217 }
218 }
219
220 /*
221 * 28 is IP (20) + UDP (8) header. ulen includes
222 * udp header, and we *don't* tell RX about udp
223 * header either. So, we remove those 8 as well.
224 */
225 data_len = ntohs(tu->uh_ulen);
226 data_len -= 8;
227 if (!(*rxk_GetPacketProc) (&phandle, data_len)) {
228 if (rx_mb_to_packet(am, m_freem, 28, data_len, phandle)) {
229 /* XXX should just increment counter here.. */
230 printf("rx: truncated UDP packet\n");
231 rxi_FreePacket(phandle);
232 } else
233 (*rxk_PacketArrivalProc) (phandle, &taddr,
234 rxk_portRocks[i], data_len);
235 } else
236 m_freem(am);
237 return;
238 }
239 }
240 }
241
242 /* if we get here, try to deliver packet to udp */
243 if (tproc = parent_proto.pr_input)
244 (*tproc) (am, aif);
245 return;
246 }
247
248 /*
249 * UDP fast timer to raise events for all but Solaris and NCR.
250 * Called about 5 times per second (at unknown priority?). Must go to
251 * splnet or obtain global lock before touching anything significant.
252 */
253 static void
254 rxk_fasttimo(void)
255 {
256 int (*tproc) ();
257 struct clock temp;
258
259 /* do rx fasttimo processing here */
260 rxevent_RaiseEvents(&temp);
261 if (tproc = parent_proto.pr_fasttimo)
262 (*tproc) ();
263 }
264
265
266 /* start intercepting basic calls */
267 void
268 rxk_init(void)
269 {
270 struct protosw *tpro, *last;
271 if (rxk_initDone)
272 return;
273
274 last = inetdomain.dom_protoswNPROTOSW;
275 for (tpro = inetdomain.dom_protosw; tpro < last; tpro++) {
276 if (tpro->pr_protocol == IPPROTO_UDP) {
277 memcpy(&parent_proto, tpro, sizeof(parent_proto));
278 tpro->pr_input = rxk_input;
279 tpro->pr_fasttimo = rxk_fasttimo;
280 rxk_initDone = 1;
281 return;
282 }
283 }
284 osi_Panic("inet:no udp");
285 }
286 #endif /* RXK_LISTENER_ENV */
287
288 /*
289 * RX IP address routines.
290 */
291
292 static afs_uint32 myNetAddrs[ADDRSPERSITE];
293 static int myNetMTUs[ADDRSPERSITE];
294 static int myNetFlags[ADDRSPERSITE];
295 static int numMyNetAddrs = 0;
296
297 /* This version doesn't even begin to handle iterative requests, but then
298 * we don't yet use them anyway. Fix this when rxi_InitPeerParams is changed
299 * to find a true maximum.
300 */
301 static int
302 rxi_MatchIfnet(struct hashbucket *h, caddr_t key, caddr_t arg1, caddr_t arg2)
303 {
304 afs_uint32 ppaddr = *(afs_uint32 *) key;
305 int match_value = *(int *)arg1;
306 struct in_ifaddr *ifa = (struct in_ifaddr *)h;
307 struct sockaddr_in *sin;
308
309 if ((ppaddr & ifa->ia_netmask) == ifa->ia_net) {
310 if ((ppaddr & ifa->ia_subnetmask) == ifa->ia_subnet) {
311 sin = IA_SIN(ifa);
312 if (sin->sin_addr.s_addr == ppaddr) { /* ie, ME!!! */
313 match_value = 4;
314 *(struct in_ifaddr **)arg2 = ifa;
315 }
316 if (match_value < 3) {
317 *(struct in_ifaddr **)arg2 = ifa;
318 match_value = 3;
319 }
320 } else {
321 if (match_value < 2) {
322 *(struct in_ifaddr **)arg2 = ifa;
323 match_value = 2;
324 }
325 }
326 }
327 *(int *)arg1 = match_value;
328 return 0;
329 }
330
331
332 struct ifnet *
333 rxi_FindIfnet(afs_uint32 addr, afs_uint32 * maskp)
334 {
335 afs_uint32 ppaddr;
336 int match_value = 0;
337 struct in_ifaddr *ifad;
338
339 if (numMyNetAddrs == 0)
340 (void)rxi_GetIFInfo();
341
342 ppaddr = ntohl(addr);
343 ifad = (struct in_ifaddr *)&hashinfo_inaddr;
344
345 (void)hash_enum(&hashinfo_inaddr, rxi_MatchIfnet, HTF_INET,
346 (caddr_t) & ppaddr, (caddr_t) & match_value,
347 (caddr_t) & ifad);
348
349 if (match_value) {
350 if (maskp)
351 *maskp = ifad->ia_subnetmask;
352 return ifad->ia_ifp;
353 } else
354 return NULL;
355 }
356
357 static int
358 rxi_EnumGetIfInfo(struct hashbucket *h, caddr_t key, caddr_t arg1,
359 caddr_t arg2)
360 {
361 int different = *(int *)arg1;
362 int i = *(int *)arg2;
363 struct in_ifaddr *iap = (struct in_ifaddr *)h;
364 struct ifnet *ifnp;
365 afs_uint32 ifinaddr;
366 afs_uint32 rxmtu;
367
368 if (i >= ADDRSPERSITE)
369 return 0;
370
371 ifnp = iap->ia_ifp;
372 rxmtu = (ifnp->if_mtu - RX_IPUDP_SIZE);
373 ifinaddr = ntohl(iap->ia_addr.sin_addr.s_addr);
374 if (myNetAddrs[i] != ifinaddr) {
375 myNetAddrs[i] = ifinaddr;
376 myNetMTUs[i] = rxmtu;
377 different++;
378 *(int *)arg1 = different;
379 }
380 rxmtu = rxmtu * rxi_nRecvFrags + ((rxi_nRecvFrags - 1) * UDP_HDR_SIZE);
381 if (!rx_IsLoopbackAddr(ifinaddr) && (rxmtu > rx_maxReceiveSize)) {
382 rx_maxReceiveSize = MIN(RX_MAX_PACKET_SIZE, rxmtu);
383 rx_maxReceiveSize = MIN(rx_maxReceiveSize, rx_maxReceiveSizeUser);
384 }
385
386 *(int *)arg2 = i + 1;
387 return 0;
388 }
389
390 int
391 rxi_GetIFInfo()
392 {
393 int i = 0;
394 int different = 0;
395
396 /* SGI 6.2 does not have a pointer from the ifnet to the list of
397 * of addresses (if_addrlist). So it's more efficient to run the
398 * in_ifaddr list and use the back pointers to the ifnet struct's.
399 */
400 (void)hash_enum(&hashinfo_inaddr, rxi_EnumGetIfInfo, HTF_INET, NULL,
401 (caddr_t) & different, (caddr_t) & i);
402
403 rx_maxJumboRecvSize =
404 RX_HEADER_SIZE + rxi_nDgramPackets * RX_JUMBOBUFFERSIZE +
405 (rxi_nDgramPackets - 1) * RX_JUMBOHEADERSIZE;
406 rx_maxJumboRecvSize = MAX(rx_maxJumboRecvSize, rx_maxReceiveSize);
407
408 return different;
409 }
410
411 /* osi_NetSend - from the now defunct afs_osinet.c */
412 #ifdef DEBUG
413 #undef DEBUG
414 #endif
415 #ifdef MP
416 #define _MP_NETLOCKS
417 #endif
418
419 #ifdef AFS_SGI65_ENV
420 osi_NetSend(asocket, addr, dvec, nvec, asize, istack)
421 osi_socket *asocket;
422 struct iovec *dvec;
423 int nvec;
424 afs_int32 asize;
425 struct sockaddr_in *addr;
426 int istack;
427 {
428 int code;
429 struct iovec tvecs[RX_MAXWVECS + 1];
430 struct iovec *iovp;
431 struct uio tuio;
432 struct mbuf *to;
433 int i;
434 bhv_desc_t bhv;
435
436 memset(&tuio, 0, sizeof(tuio));
437 memset(&tvecs, 0, sizeof(tvecs));
438
439 if (nvec > RX_MAXWVECS + 1) {
440 osi_Panic("osi_NetSend: %d: Too many iovecs.\n", nvec);
441 }
442 memcpy((char *)tvecs, (char *)dvec, nvec * sizeof(struct iovec));
443
444 tuio.uio_iov = tvecs;
445 tuio.uio_iovcnt = nvec;
446 tuio.uio_segflg = UIO_SYSSPACE;
447 tuio.uio_offset = 0;
448 tuio.uio_sigpipe = 0;
449 tuio.uio_pio = 0;
450 tuio.uio_pbuf = 0;
451
452 tuio.uio_resid = 0;
453 for (i = 0, iovp = tvecs; i < nvec; i++, iovp++)
454 tuio.uio_resid += iovp->iov_len;
455
456
457 to = m_get(M_WAIT, MT_SONAME);
458 to->m_len = sizeof(struct sockaddr_in);
459 memcpy(mtod(to, caddr_t), (char *)addr, to->m_len);
460
461 BHV_PDATA(&bhv) = (void *)asocket;
462 code = sosend(&bhv, to, &tuio, 0, NULL);
463
464 m_freem(to);
465 return code;
466 }
467 #else /* AFS_SGI65_ENV */
468
469 int
470 dummy_sblock(struct sockbuf *a, int b, struct socket *c, int *d, int e)
471 {
472 afs_warn
473 ("sblock was called before it was installed. Install proper afsd.\n");
474 }
475
476 void
477 dummy_sbunlock(struct sockbuf *a, int b, struct socket *c, int d)
478 {
479 afs_warn
480 ("sbunlock was called before it was installed. Install proper afsd.\n");
481 }
482
483 int (*afs_sblockp) (struct sockbuf *, int, struct socket *, int *, int) =
484 dummy_sblock;
485 void (*afs_sbunlockp) (struct sockbuf *, int, struct socket *, int) =
486 dummy_sbunlock;
487 #define AFS_SBUNLOCK(SB, EV, SO, O) (*afs_sbunlockp)(SB, EV, SO, O)
488
489 /* osi_NetSend - send asize bytes at adata from asocket to host at addr.
490 *
491 * Now, why do we allocate a new buffer when we could theoretically use the one
492 * pointed to by adata? Because PRU_SEND returns after queueing the message,
493 * not after sending it. If the sender changes the data after queueing it,
494 * we'd see the already-queued data change. One attempt to fix this without
495 * adding a copy would be to have this function wait until the datagram is
496 * sent; however this doesn't work well. In particular, if a host is down, and
497 * an ARP fails to that host, this packet will be queued until the ARP request
498 * comes back, which could be hours later. We can't block in this routine that
499 * long, since it prevents RPC timeouts from happening.
500 */
501 /* XXX In the brave new world, steal the data bufs out of the rx_packet iovec,
502 * and just queue those. XXX
503 */
504 int
505 osi_NetSend(asocket, addr, dvec, nvec, asize, istack)
506 struct socket *asocket;
507 struct iovec *dvec;
508 int nvec;
509 afs_int32 asize;
510 struct sockaddr_in *addr;
511 int istack;
512 {
513 struct mbuf *tm, *um;
514 afs_int32 code;
515 int s;
516 struct mbuf *top = 0;
517 struct mbuf *m, **mp;
518 int len;
519 char *tdata;
520 caddr_t tpa;
521 int i, tl, rlen;
522
523 NETSPL_DECL(s1)
524 AFS_STATCNT(osi_NetSend);
525
526 (*afs_sblockp) (&asocket->so_snd, NETEVENT_SODOWN, asocket, &s1, istack);
527
528 s = splnet();
529 mp = &top;
530 i = 0;
531 tdata = dvec[i].iov_base;
532 tl = dvec[i].iov_len;
533 while (1) {
534 if ((m = m_vget(M_DONTWAIT, MIN(asize, VCL_MAX), MT_DATA)) == NULL) {
535 if (top)
536 m_freem(top);
537 splx(s);
538 AFS_SBUNLOCK(&asocket->so_snd, NETEVENT_SODOWN, asocket, s1);
539 return 1;
540 }
541 len = MIN(m->m_len, asize);
542 m->m_len = 0;
543 tpa = mtod(m, caddr_t);
544 while (len) {
545 rlen = MIN(len, tl);
546 memcpy(tpa, tdata, rlen);
547 asize -= rlen;
548 len -= rlen;
549 tpa += rlen;
550 m->m_len += rlen;
551 tdata += rlen;
552 tl -= rlen;
553 if (tl <= 0) {
554 i++;
555 if (i > nvec) {
556 /* shouldn't come here! */
557 asize = 0; /* so we make progress toward completion */
558 break;
559 }
560 tdata = dvec[i].iov_base;
561 tl = dvec[i].iov_len;
562 }
563 }
564 *mp = m;
565 mp = &m->m_next;
566 if (asize <= 0)
567 break;
568 }
569 tm = top;
570
571 tm->m_act = NULL;
572
573 /* setup mbuf corresponding to destination address */
574 um = m_get(M_DONTWAIT, MT_SONAME);
575 if (!um) {
576 if (top)
577 m_freem(top); /* free mbuf chain */
578 /* if this were vfs40, we'd do sbunlock(asocket, &asocket->so_snd), but
579 * we don't do the locking at all for vfs40 systems */
580 splx(s);
581 AFS_SBUNLOCK(&asocket->so_snd, NETEVENT_SODOWN, asocket, s1);
582 return 1;
583 }
584 memcpy(mtod(um, caddr_t), addr, sizeof(*addr));
585 um->m_len = sizeof(*addr);
586 /* note that udp_usrreq frees funny mbuf. We hold onto data, but mbuf
587 * around it is gone. we free address ourselves. */
588 code = (*asocket->so_proto->pr_usrreq) (asocket, PRU_SEND, tm, um, 0);
589 splx(s);
590 m_free(um);
591 AFS_SBUNLOCK(&asocket->so_snd, NETEVENT_SODOWN, asocket, s1);
592
593 return code;
594 }
595 #endif /* AFS_SGI65_ENV */