Import Upstream version 1.8.5
[hcoop/debian/openafs.git] / src / rx / FBSD / rx_knet.c
1 /*
2 * Copyright 2000, International Business Machines Corporation and others.
3 * All Rights Reserved.
4 *
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
8 */
9
10 #include <afsconfig.h>
11 #include "afs/param.h"
12
13
14 #include <sys/malloc.h>
15 #include "rx/rx_kcommon.h"
16
17 #ifdef RXK_LISTENER_ENV
18 int
19 osi_NetReceive(osi_socket asocket, struct sockaddr_in *addr,
20 struct iovec *dvec, int nvecs, int *alength)
21 {
22 struct uio u;
23 int i;
24 struct iovec iov[RX_MAXIOVECS];
25 struct sockaddr *sa = NULL;
26 int code;
27
28 int haveGlock = ISAFS_GLOCK();
29
30 memset(&u, 0, sizeof(u));
31 memset(&iov, 0, sizeof(iov));
32
33 /*AFS_STATCNT(osi_NetReceive); */
34
35 if (nvecs > RX_MAXIOVECS)
36 osi_Panic("osi_NetReceive: %d: Too many iovecs.\n", nvecs);
37
38 for (i = 0; i < nvecs; i++)
39 iov[i] = dvec[i];
40
41 u.uio_iov = &iov[0];
42 u.uio_iovcnt = nvecs;
43 u.uio_offset = 0;
44 u.uio_resid = *alength;
45 u.uio_segflg = UIO_SYSSPACE;
46 u.uio_rw = UIO_READ;
47 u.uio_td = NULL;
48
49 if (haveGlock)
50 AFS_GUNLOCK();
51 code = soreceive(asocket, &sa, &u, NULL, NULL, NULL);
52 if (haveGlock)
53 AFS_GLOCK();
54
55 if (code) {
56 #if KNET_DEBUG
57 if (code == EINVAL)
58 Debugger("afs NetReceive busted");
59 else
60 printf("y");
61 #else
62 return code;
63 #endif
64 }
65 *alength -= u.uio_resid;
66 if (sa) {
67 if (sa->sa_family == AF_INET) {
68 if (addr)
69 *addr = *(struct sockaddr_in *)sa;
70 } else
71 printf("Unknown socket family %d in NetReceive\n", sa->sa_family);
72 FREE(sa, M_SONAME);
73 }
74 return code;
75 }
76
77 extern int rxk_ListenerPid;
78 void
79 osi_StopListener(void)
80 {
81 struct sockaddr_in taddr;
82 struct iovec dvec;
83 struct proc *p;
84 char c;
85 c = '\0';
86
87 /*
88 * Have to drop global lock to safely do this.
89 * soclose() is currently protected by Giant,
90 * but pfind and psignal are MPSAFE.
91 */
92 int haveGlock = ISAFS_GLOCK();
93 if (haveGlock)
94 AFS_GUNLOCK();
95 soshutdown(rx_socket, SHUT_RDWR);
96 p = pfind(rxk_ListenerPid);
97 if (p) {
98 afs_warn("osi_StopListener: rxk_ListenerPid %u\n", rxk_ListenerPid);
99 #if (__FreeBSD_version >= 900044)
100 kern_psignal(p, SIGUSR1);
101 #else
102 psignal(p, SIGUSR1);
103 #endif
104 PROC_UNLOCK(p);
105 } else
106 afs_warn("osi_StopListener: rxk_Listener not found (pid %u)\n",
107 rxk_ListenerPid);
108
109 /* Avoid destroying socket until osi_NetReceive has
110 * had a chance to clean up. Otherwise we can't restart. */
111 bzero(&taddr, sizeof(taddr));
112 taddr.sin_len = sizeof(struct sockaddr_in);
113 taddr.sin_family = AF_INET;
114 taddr.sin_port = rx_port;
115 taddr.sin_addr.s_addr = htonl(0x7f000001); /* no place like localhost */
116 bzero(&dvec, sizeof(dvec));
117 dvec.iov_base = &c;
118 dvec.iov_len = 1;
119 /* afs_osi_Sleep requires the GLOCK */
120 AFS_GLOCK();
121 while(rxk_ListenerPid) {
122 afs_warn("waiting for rxk_ListenerPid to die\n");
123 osi_NetSend(rx_socket, &taddr, &dvec, 1, 1, 0);
124 afs_osi_Sleep(&rxk_ListenerPid);
125 }
126 AFS_GUNLOCK();
127 /* in theory, we are now the only people doing anything with rx_socket */
128 soclose(rx_socket);
129
130 if (haveGlock)
131 AFS_GLOCK();
132 }
133
134 int
135 osi_NetSend(osi_socket asocket, struct sockaddr_in *addr, struct iovec *dvec,
136 int nvecs, afs_int32 alength, int istack)
137 {
138 afs_int32 code;
139 int i;
140 struct iovec iov[RX_MAXIOVECS];
141 struct uio u;
142 int haveGlock = ISAFS_GLOCK();
143
144 memset(&u, 0, sizeof(u));
145 memset(&iov, 0, sizeof(iov));
146
147 AFS_STATCNT(osi_NetSend);
148 if (nvecs > RX_MAXIOVECS)
149 osi_Panic("osi_NetSend: %d: Too many iovecs.\n", nvecs);
150
151 for (i = 0; i < nvecs; i++)
152 iov[i] = dvec[i];
153
154 u.uio_iov = &iov[0];
155 u.uio_iovcnt = nvecs;
156 u.uio_offset = 0;
157 u.uio_resid = alength;
158 u.uio_segflg = UIO_SYSSPACE;
159 u.uio_rw = UIO_WRITE;
160 u.uio_td = NULL;
161
162 addr->sin_len = sizeof(struct sockaddr_in);
163
164 if (haveGlock)
165 AFS_GUNLOCK();
166 #if KNET_DEBUG
167 printf("+");
168 #endif
169 code =
170 sosend(asocket, (struct sockaddr *)addr, &u, NULL, NULL, 0,
171 curthread);
172 #if KNET_DEBUG
173 if (code) {
174 if (code == EINVAL)
175 Debugger("afs NetSend busted");
176 else
177 printf("z");
178 }
179 #endif
180 if (haveGlock)
181 AFS_GLOCK();
182 return code;
183 }
184 #else
185 /* This code *almost* works :( */
186 static struct protosw parent_proto; /* udp proto switch */
187 static void rxk_input(struct mbuf *am, int iphlen);
188 static void rxk_fasttimo(void);
189
190 /* start intercepting basic calls */
191 rxk_init()
192 {
193 struct protosw *tpro, *last;
194 if (rxk_initDone)
195 return 0;
196
197 last = inetdomain.dom_protoswNPROTOSW;
198 for (tpro = inetdomain.dom_protosw; tpro < last; tpro++)
199 if (tpro->pr_protocol == IPPROTO_UDP) {
200 #if 0 /* not exported */
201 /* force UDP checksumming on for AFS */
202 extern int udpcksum;
203 udpcksum = 1;
204 #endif
205 memcpy(&parent_proto, tpro, sizeof(parent_proto));
206 tpro->pr_input = rxk_input;
207 tpro->pr_fasttimo = rxk_fasttimo;
208 /*
209 * don't bother with pr_drain and pr_ctlinput
210 * until we have something to do
211 */
212 rxk_initDone = 1;
213 return 0;
214 }
215 osi_Panic("inet:no udp");
216 }
217
218
219 static void
220 rxk_input(struct mbuf *am, int iphlen)
221 {
222 void (*tproc) ();
223 unsigned short *tsp;
224 int hdr;
225 struct udphdr *tu;
226 struct ip *ti;
227 struct udpiphdr *tvu;
228 int i;
229 char *phandle;
230 afs_int32 code;
231 struct sockaddr_in taddr;
232 int tlen;
233 short port;
234 int data_len, comp_sum;
235
236 SPLVAR;
237 NETPRI;
238
239 /* make sure we have base ip and udp headers in first mbuf */
240 if (iphlen > sizeof(struct ip)) {
241 ip_stripoptions(am, NULL);
242 iphlen = sizeof(struct ip);
243 }
244
245 if (am->m_len < sizeof(struct udpiphdr)) {
246 am = m_pullup(am, sizeof(struct udpiphdr));
247 if (!am) {
248 USERPRI;
249 return;
250 }
251 }
252
253 ti = mtod(am, struct ip *);
254 /* skip basic ip hdr */
255 tu = (struct udphdr *)(((char *)ti) + sizeof(struct ip));
256
257 /* now read the port out */
258 port = tu->uh_dport;
259
260 if (port) {
261 for (tsp = rxk_ports, i = 0; i < MAXRXPORTS; i++) {
262 if (*tsp++ == port) {
263 /* checksum the packet */
264 /*
265 * Make mbuf data length reflect UDP length.
266 * If not enough data to reflect UDP length, drop.
267 */
268 tvu = (struct udpiphdr *)ti;
269 tlen = ntohs((u_short) tvu->ui_ulen);
270 if ((int)ti->ip_len != tlen) {
271 if (tlen > (int)ti->ip_len) {
272 m_free(am);
273 USERPRI;
274 return;
275 }
276 m_adj(am, tlen - (int)ti->ip_len);
277 }
278 /* deliver packet to rx */
279 taddr.sin_family = AF_INET; /* compute source address */
280 taddr.sin_port = tu->uh_sport;
281 taddr.sin_addr.s_addr = ti->ip_src.s_addr;
282 taddr.sin_len = sizeof(taddr);
283 tvu = (struct udpiphdr *)ti; /* virtual udp structure, for cksum */
284 /* handle the checksum. Note that this code damages the actual ip
285 * header (replacing it with the virtual one, which is the same size),
286 * so we must ensure we get everything out we need, first */
287 if (tu->uh_sum != 0) {
288 /* if the checksum is there, always check it. It's crazy not
289 * to, unless you can really be sure that your
290 * underlying network (and interfaces and drivers and
291 * DMA hardware, etc!) is error-free. First, fill
292 * in entire virtual ip header. */
293 memset(tvu->ui_i.ih_x1, 0, 9);
294 tvu->ui_len = tvu->ui_ulen;
295 tlen = ntohs((unsigned short)(tvu->ui_ulen));
296 if (in_cksum(am, sizeof(struct ip) + tlen)) {
297 /* checksum, including cksum field, doesn't come out 0, so
298 * this packet is bad */
299 m_freem(am);
300 USERPRI;
301 return;
302 }
303 }
304
305 /*
306 * 28 is IP (20) + UDP (8) header. ulen includes
307 * udp header, and we *don't* tell RX about udp
308 * header either. So, we remove those 8 as well.
309 */
310 data_len = ntohs(tu->uh_ulen);
311 data_len -= 8;
312 if (!(*rxk_GetPacketProc) (&phandle, data_len)) {
313 if (rx_mb_to_packet(am, m_freem, 28, data_len, phandle)) {
314 /* XXX should just increment counter here.. */
315 printf("rx: truncated UDP packet\n");
316 rxi_FreePacket(phandle);
317 } else
318 (*rxk_PacketArrivalProc) (phandle, &taddr,
319 rxk_portRocks[i], data_len);
320 } else
321 m_freem(am);
322 USERPRI;
323 return;
324 }
325 }
326 }
327
328 /* if we get here, try to deliver packet to udp */
329 if (tproc = parent_proto.pr_input)
330 (*tproc) (am, iphlen);
331 USERPRI;
332 return;
333 }
334
335
336 /*
337 * UDP fast timer to raise events for all but Solaris and NCR.
338 * Called about 5 times per second (at unknown priority?). Must go to
339 * splnet or obtain global lock before touching anything significant.
340 */
341 static void
342 rxk_fasttimo(void)
343 {
344 void (*tproc) ();
345 struct clock temp;
346
347 /* do rx fasttimo processing here */
348 rxevent_RaiseEvents(&temp);
349 if (tproc = parent_proto.pr_fasttimo)
350 (*tproc) ();
351 }
352
353 /* rx_NetSend - send asize bytes at adata from asocket to host at addr.
354 *
355 * Now, why do we allocate a new buffer when we could theoretically use the one
356 * pointed to by adata? Because PRU_SEND returns after queueing the message,
357 * not after sending it. If the sender changes the data after queueing it,
358 * we'd see the already-queued data change. One attempt to fix this without
359 * adding a copy would be to have this function wait until the datagram is
360 * sent; however this doesn't work well. In particular, if a host is down, and
361 * an ARP fails to that host, this packet will be queued until the ARP request
362 * comes back, which could be hours later. We can't block in this routine that
363 * long, since it prevents RPC timeouts from happening.
364 */
365 /* XXX In the brave new world, steal the data bufs out of the rx_packet iovec,
366 * and just queue those. XXX
367 */
368
369 /* set lock on sockbuf sb; can't call sblock since we're at interrupt level
370 * sometimes */
371 static
372 trysblock(sb)
373 struct sockbuf *sb;
374 {
375 AFS_STATCNT(trysblock);
376 if (sb->sb_flags & SB_LOCK) {
377 return -1; /* can't lock socket */
378 }
379 sb->sb_flags |= SB_LOCK;
380 return 0;
381 }
382
383 /* We only have to do all the mbuf management ourselves if we can be called at
384 interrupt time. in RXK_LISTENER_ENV, we can just call sosend() */
385 int
386 osi_NetSend(osi_socket asocket, struct sockaddr_in *addr, struct iovec *dvec,
387 int nvec, afs_int32 asize, int istack)
388 {
389 struct mbuf *tm, *um;
390 afs_int32 code;
391 int s;
392 struct mbuf *top = 0;
393 struct mbuf *m, **mp;
394 int len;
395 char *tdata;
396 caddr_t tpa;
397 int i, tl, rlen;
398 int mlen;
399 int haveGlock;
400 #if KNET_DEBUG
401 static int before = 0;
402 #endif
403
404 AFS_STATCNT(osi_NetSend);
405 /* Actually, the Ultrix way is as good as any for us, so we don't bother with
406 * special mbufs any more. Used to think we could get away with not copying
407 * the data to the interface, but there's no way to tell the caller not to
408 * reuse the buffers after sending, so we lost out on that trick anyway */
409 s = splnet();
410 if (trysblock(&asocket->so_snd)) {
411 splx(s);
412 return 1;
413 }
414 mp = &top;
415 i = 0;
416 tdata = dvec[i].iov_base;
417 tl = dvec[i].iov_len;
418 while (1) {
419 mlen = MLEN;
420 if (top == 0) {
421 MGETHDR(m, M_DONTWAIT, MT_DATA);
422 if (!m) {
423 sbunlock(&asocket->so_snd);
424 splx(s);
425 return 1;
426 }
427 mlen = MHLEN;
428 m->m_pkthdr.len = 0;
429 m->m_pkthdr.rcvif = NULL;
430 } else
431 MGET(m, M_DONTWAIT, MT_DATA);
432 if (!m) {
433 /* can't get an mbuf, give up */
434 if (top)
435 m_freem(top); /* free mbuf list we're building */
436 sbunlock(&asocket->so_snd);
437 splx(s);
438 return 1;
439 }
440 /*
441 * WARNING: the `4 * MLEN' is somewhat dubious. It is better than
442 * `NBPG', which may have no relation to `CLBYTES'. Also, `CLBYTES'
443 * may be so large that we never use clusters, resulting in far
444 * too many mbufs being used. It is often better to briefly use
445 * a cluster, even if we are only using a portion of it. Since
446 * we are on the xmit side, it shouldn't end up sitting on a queue
447 * for a potentially unbounded time (except perhaps if we are talking
448 * to ourself).
449 */
450 if (asize >= 4 * MLEN) { /* try to get cluster mbuf */
451 /* different algorithms for getting cluster mbuf */
452 MCLGET(m, M_DONTWAIT);
453 if ((m->m_flags & M_EXT) == 0)
454 goto nopages;
455 mlen = MCLBYTES;
456
457 /* now compute usable size */
458 len = MIN(mlen, asize);
459 /* Should I look at MAPPED_MBUFS??? */
460 } else {
461 nopages:
462 len = MIN(mlen, asize);
463 }
464 m->m_len = 0;
465 *mp = m; /* XXXX */
466 top->m_pkthdr.len += len;
467 tpa = mtod(m, caddr_t);
468 while (len) {
469 rlen = MIN(len, tl);
470 memcpy(tpa, tdata, rlen);
471 asize -= rlen;
472 len -= rlen;
473 tpa += rlen;
474 m->m_len += rlen;
475 tdata += rlen;
476 tl -= rlen;
477 if (tl <= 0) {
478 i++;
479 if (i > nvec) {
480 /* shouldn't come here! */
481 asize = 0; /* so we make progress toward completion */
482 break;
483 }
484 tdata = dvec[i].iov_base;
485 tl = dvec[i].iov_len;
486 }
487 }
488 *mp = m;
489 mp = &m->m_next;
490 if (asize <= 0)
491 break;
492 }
493 tm = top;
494
495 tm->m_act = NULL;
496
497 /* setup mbuf corresponding to destination address */
498 um = m_get(M_DONTWAIT, MT_SONAME);
499 if (!um) {
500 if (top)
501 m_freem(top); /* free mbuf chain */
502 sbunlock(&asocket->so_snd);
503 splx(s);
504 return 1;
505 }
506 memcpy(mtod(um, caddr_t), addr, sizeof(*addr));
507 addr->sin_len = um->m_len = sizeof(*addr);
508 /* note that udp_usrreq frees funny mbuf. We hold onto data, but mbuf
509 * around it is gone. */
510 /* haveGlock = ISAFS_GLOCK();
511 * if (haveGlock) {
512 * AFS_GUNLOCK();
513 * } */
514 /* SOCKET_LOCK(asocket); */
515 /* code = (*asocket->so_proto->pr_usrreq)(asocket, PRU_SEND, tm, um, 0); */
516 #if KNET_DEBUG
517 if (before)
518 Debugger("afs NetSend before");
519 #endif
520 code =
521 (*asocket->so_proto->pr_usrreqs->pru_send) (asocket, 0, tm,
522 (struct sockaddr *)
523 addr, um, &proc0);
524 /* SOCKET_UNLOCK(asocket); */
525 /* if (haveGlock) {
526 * AFS_GLOCK();
527 * } */
528 sbunlock(&asocket->so_snd);
529 splx(s);
530 #if KNET_DEBUG
531 if (code) {
532 if (code == EINVAL)
533 Debugger("afs NetSend busted");
534 else
535 printf("z");
536 }
537 #endif
538 return code;
539 }
540 #endif