Import Upstream version 1.8.5
[hcoop/debian/openafs.git] / src / rx / AIX / rx_knet.c
1 /*
2 * Copyright 2000, International Business Machines Corporation and others.
3 * All Rights Reserved.
4 *
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
8 */
9
10 #include <afsconfig.h>
11 #include "afs/param.h"
12
13
14 #ifdef AFS_AIX41_ENV
15 #include "rx/rx_kcommon.h"
16
17 static struct protosw parent_proto; /* udp proto switch */
18
19 static void
20 rxk_input(struct mbuf *am, int hlen)
21 {
22 unsigned short *tsp;
23 int hdr;
24 struct udphdr *tu;
25 struct ip *ti;
26 struct udpiphdr *tvu;
27 int i;
28 char *phandle;
29 long code;
30 struct sockaddr_in taddr;
31 int tlen;
32 short port;
33 int data_len, comp_sum;
34 /* make sure we have base ip and udp headers in first mbuf */
35 if (M_HASCL(am) || am->m_len < 28) {
36 am = m_pullup(am, 28);
37 if (!am)
38 return;
39 }
40 hdr = (mtod(am, struct ip *))->ip_hl;
41 if (hdr > 5) {
42 /* pull up more, the IP hdr is bigger than usual */
43 if (am->m_len < (8 + (hdr << 2))) {
44 am = m_pullup(am, 8 + (hdr << 2));
45 if (!am)
46 return;
47 }
48 ti = mtod(am, struct ip *); /* recompute, since m_pullup allocates new mbuf */
49 tu = (struct udphdr *)(((char *)ti) + (hdr << 2)); /* skip ip hdr */
50 } else {
51 ti = mtod(am, struct ip *);
52 tu = (struct udphdr *)(((char *)ti) + 20); /* skip basic ip hdr */
53 }
54
55 /* now read the port out */
56 port = tu->uh_dport;
57 if (port) {
58 for (tsp = rxk_ports, i = 0; i < MAXRXPORTS; i++) {
59 if (*tsp++ == port) {
60 rxk_kpork(am);
61 return;
62 }
63 }
64 }
65 /* if we get here, try to deliver packet to udp */
66 if (parent_proto.pr_input)
67 udp_input(am, hlen);
68 }
69
70 /*
71 * the AIX version is complicated by the fact that the internet protocols
72 * are in a separate kernel extension, and they are unwilling to export their
73 * symbols to us. We can get there indirectly, however.
74 */
75 #include <net/netisr.h>
76 static struct ifqueue rxk_q; /* RXKluge queue */
77 static struct arpcom rxk_bogosity;
78
79 /* rxk_kpork - send pkt over to netwerk kporc for processing */
80 rxk_kpork(struct mbuf *m)
81 {
82 find_input_type(0xdead, m, &rxk_bogosity, 0);
83 }
84
85 /*
86 * AIX 4.3.3 changed the type of the second argument to
87 * ip_stripoptions(). The ip_stripoptions() prototype is in
88 * <netinet/proto_inet.h>. This header file also acquired a guard
89 * macro, _PROTO_INET_H_, at the same time. So we test for the guard
90 * macro to see which type we need to use for the second argument to
91 * ip_stripoptions().
92 *
93 * This way we don't have to introduce a port just to compile AFS on AIX
94 * 4.3.3.
95 */
96
97 #if defined(_PROTO_INET_H_) /* AIX 4.3.3 and presumably later */
98 #define STRIP_ARG2_TYPE unsigned long
99 #else /* AIX 4.3.2 and earlier */
100 #define STRIP_ARG2_TYPE struct mbuf *
101 #endif
102
103 void
104 ip_stripoptions(struct mbuf *m, STRIP_ARG2_TYPE mopt)
105 {
106 struct ip *ip = mtod(m, struct ip *);
107 int i;
108 caddr_t opts;
109 int olen;
110
111 olen = (ip->ip_hl << 2) - sizeof(struct ip);
112 opts = (caddr_t) (ip + 1);
113 i = m->m_len - (sizeof(struct ip) + olen);
114 memcpy(opts, opts + olen, (unsigned)i);
115 m->m_len -= olen;
116 if (m->m_flags & M_PKTHDR)
117 m->m_pkthdr.len -= olen;
118 ip->ip_hl = sizeof(struct ip) >> 2;
119 }
120
121 /* rxk_RX_input - RX pkt input process */
122 rxk_RX_input(struct mbuf *am)
123 {
124 unsigned short *tsp;
125 int hdr;
126 struct udphdr *tu;
127 struct ip *ti;
128 struct udpiphdr *tvu;
129 int i;
130 struct rx_packet *phandle;
131 long code;
132 struct sockaddr_in taddr;
133 int tlen;
134 short port;
135 int data_len, comp_sum;
136
137 hdr = (ti = mtod(am, struct ip *))->ip_hl;
138 if (hdr > 5) {
139 ip_stripoptions(am, 0); /* get rid of anything we don't need */
140 }
141 tu = (struct udphdr *)(((char *)ti) + 20);
142 /*
143 * Make mbuf data length reflect UDP length.
144 * If not enough data to reflect UDP length, drop.
145 */
146 tvu = (struct udpiphdr *)ti;
147 tlen = ntohs((u_short) tvu->ui_ulen);
148 if ((int)ti->ip_len != tlen) {
149 if (tlen > (int)ti->ip_len) {
150 #ifdef RX_KERNEL_TRACE
151 int glockOwner = ISAFS_GLOCK();
152 if (!glockOwner)
153 AFS_GLOCK();
154 afs_Trace3(afs_iclSetp, CM_TRACE_WASHERE, ICL_TYPE_STRING,
155 __FILE__, ICL_TYPE_INT32, __LINE__, ICL_TYPE_INT32,
156 tlen);
157 if (!glockOwner)
158 AFS_GUNLOCK();
159 #endif
160 m_free(am);
161 return;
162 }
163 m_adj(am, tlen - (int)ti->ip_len);
164 }
165 /* deliver packet to rx */
166 taddr.sin_family = AF_INET; /* compute source address */
167 taddr.sin_port = tu->uh_sport;
168 taddr.sin_addr.s_addr = ti->ip_src.s_addr;
169 /* handle the checksum. Note that this code damages the actual ip
170 * header (replacing it with the virtual one, which is the same size),
171 * so we must ensure we get everything out we need, first */
172 if (tu->uh_sum != 0) {
173 /* if the checksum is there, always check it. It's crazy not
174 * to, unless you can really be sure that your
175 * underlying network (and interfaces and drivers and
176 * DMA hardware, etc!) is error-free. First, fill
177 * in entire virtual ip header. */
178 #ifndef AFS_64BIT_KERNEL
179 tvu->ui_next = 0;
180 tvu->ui_prev = 0;
181 #endif
182 tvu->ui_x1 = 0;
183 tvu->ui_len = tvu->ui_ulen;
184 am->m_flags |= M_PKTHDR;
185 am->m_pkthdr.len = tlen;
186 #if !defined(AFS_AIX51_ENV) || !defined(AFS_64BIT_KERNEL)
187 if (in_cksum(am, sizeof(struct ip) + tlen)) {
188 /* checksum, including cksum field, doesn't come out 0, so
189 * this packet is bad */
190 #ifdef RX_KERNEL_TRACE
191 int glockOwner = ISAFS_GLOCK();
192 if (!glockOwner)
193 AFS_GLOCK();
194 afs_Trace3(afs_iclSetp, CM_TRACE_WASHERE, ICL_TYPE_STRING,
195 __FILE__, ICL_TYPE_INT32, __LINE__, ICL_TYPE_INT32,
196 tlen);
197 if (!glockOwner)
198 AFS_GUNLOCK();
199 #endif
200 m_freem(am);
201 return;
202 }
203 #else
204 #ifdef notdef
205 { /* in_cksum() doesn't work correctly or the length is wrong? */
206 int cksum;
207 int glockOwner = ISAFS_GLOCK();
208 cksum = in_cksum(am, sizeof(struct ip) + tlen);
209 if (!glockOwner)
210 AFS_GLOCK();
211 afs_Trace3(afs_iclSetp, CM_TRACE_WASHERE, ICL_TYPE_STRING,
212 __FILE__, ICL_TYPE_INT32, __LINE__, ICL_TYPE_INT32,
213 cksum);
214 if (!glockOwner)
215 AFS_GUNLOCK();
216 }
217 #endif
218 #endif
219 }
220
221 /*
222 * 28 is IP (20) + UDP (8) header. ulen includes
223 * udp header, and we *don't* tell RX about udp
224 * header either. So, we remove those 8 as well.
225 */
226 data_len = ntohs(tu->uh_ulen);
227 data_len -= 8;
228 if (!(*rxk_GetPacketProc) (&phandle, data_len)) {
229 if (rx_mb_to_packet(am, m_freem, 28, data_len, phandle)) {
230 /* XXX should just increment counter here.. */
231 printf("rx: truncated UDP packet\n");
232 rxi_FreePacket(phandle);
233 } else
234 (*rxk_PacketArrivalProc) (phandle, &taddr, rx_socket, data_len);
235 } else
236 m_freem(am);
237 }
238
239 /* rxk_isr - RX Kluge Input Service Routine */
240 static
241 rxk_isr()
242 {
243 struct mbuf *m;
244 IFQ_LOCK_DECL(); /* silly macro has trailing ';'. Sigh. */
245 while (1) {
246 IF_DEQUEUE(&rxk_q, m);
247 if (!m)
248 return;
249 rxk_RX_input(m);
250 }
251 }
252
253 /*
254 * UDP fast timer to raise events for all but Solaris and NCR.
255 * Called about 5 times per second (at unknown priority?). Must go to
256 * splnet or obtain global lock before touching anything significant.
257 */
258 static void
259 rxk_fasttimo(void)
260 {
261 void (*tproc) (void);
262 struct clock temp;
263
264 /* do rx fasttimo processing here */
265 rxevent_RaiseEvents(&temp);
266 if (tproc = parent_proto.pr_fasttimo)
267 (*tproc) ();
268 }
269
270
271 void
272 rxk_init(void)
273 {
274 struct protosw *pr;
275 extern struct protosw *pffindproto();
276
277 if (!rxk_initDone && (pr = pffindproto(AF_INET, IPPROTO_UDP, SOCK_DGRAM))) {
278 parent_proto = *pr;
279
280 pr->pr_input = rxk_input;
281 pr->pr_fasttimo = rxk_fasttimo;
282
283
284 /*
285 * don't bother with pr_drain and pr_ctlinput
286 * until we have something to do
287 */
288 rxk_q.ifq_maxlen = 128; /* obligatory XXX */
289 /* add pseudo pkt types as haque to get back onto net kproc */
290 if (!add_input_type
291 (0xdead, NET_KPROC, rxk_isr, &rxk_q, NETISR_MAX - 1))
292 rxk_initDone = 1;
293 }
294
295 if (!rxk_initDone) {
296 printf("\nAFS: no INTERNET protocol support found\n");
297 }
298 }
299
300
301
302 void
303 shutdown_rxkernel(void)
304 {
305 struct protosw *pr;
306 int i;
307 extern struct protosw *pffindproto();
308
309 if (rxk_initDone && (pr = pffindproto(AF_INET, IPPROTO_UDP, SOCK_DGRAM))) {
310 *pr = parent_proto;
311
312 rxk_initDone = 0;
313 for (i = 0; i < MAXRXPORTS; i++) {
314 if (rxk_ports[i]) {
315 rxk_ports[i] = 0;
316 soclose((struct socket *)rxk_portRocks[i]);
317 rxk_portRocks[i] = NULL;
318 }
319 }
320 del_input_type(0xdead);
321 }
322 }
323
324
325 /* osi_NetSend - send asize bytes at adata from asocket to host at addr.
326 *
327 * Now, why do we allocate a new buffer when we could theoretically use the one
328 * pointed to by adata? Because PRU_SEND returns after queueing the message,
329 * not after sending it. If the sender changes the data after queueing it,
330 * we'd see the already-queued data change. One attempt to fix this without
331 * adding a copy would be to have this function wait until the datagram is
332 * sent; however this doesn't work well. In particular, if a host is down, and
333 * an ARP fails to that host, this packet will be queued until the ARP request
334 * comes back, which could be hours later. We can't block in this routine that
335 * long, since it prevents RPC timeouts from happening.
336 */
337 /* XXX In the brave new world, steal the data bufs out of the rx_packet iovec,
338 * and just queue those. XXX
339 */
340
341 int
342 osi_NetSend(osi_socket asocket, struct sockaddr_in *addr, struct iovec *dvec,
343 int nvec, afs_int32 asize, int istack)
344 {
345 struct mbuf *tm, *um;
346 afs_int32 code;
347 struct mbuf *top = 0;
348 struct mbuf *m, **mp;
349 int len, mlen;
350 char *tdata;
351 caddr_t tpa;
352 int i, tl, rlen;
353
354 AFS_STATCNT(osi_NetSend);
355 #ifndef AFS_AIX41_ENV
356 /*
357 * VRMIX has a version of sun's mclgetx() that works correctly with
358 * respect to mcopy(), so we can just dummy up the entire packet as
359 * an mbuf cluster, and pass it to the IP output routine (which will
360 * most likely have to frag it, but since mclgetx() has been fixed,
361 * will work ok). The only problem is that we have to wait until
362 * m_free() has been called on the cluster, to guarantee that we
363 * do not muck with it until it has gone out. We also must refrain
364 * from inadvertantly touching a piece of data that falls within the
365 * same cache line as any portion of the packet, if we have been lucky
366 * enough to be DMA-ing directly out from it.
367 * Certain IBM architects assure me that the rios is fast enough
368 * that the cost of the extra copy, as opposed to trying to
369 * DMA directly from the packet is barely worth my while,
370 * but I have a hard time accepting this.
371 *
372 * We can only use this code once we are passed in an indication of
373 * whether we are being called `process-synchronously' or not.
374 *
375 * of course, the packet must be pinned, which is currently true,
376 * but in future may not be.
377 */
378 #endif
379 mp = &top;
380 i = 0;
381 tdata = dvec[0].iov_base;
382 tl = dvec[0].iov_len;
383
384 while (1) {
385 if (!top) {
386 MGETHDR(m, M_DONTWAIT, MT_DATA);
387 mlen = MHLEN;
388 } else {
389 MGET(m, M_DONTWAIT, MT_DATA);
390 mlen = MLEN;
391 }
392 if (!m) {
393 /* can't get an mbuf, give up */
394 if (top)
395 m_freem(top); /* free mbuf list we're building */
396 return 1;
397 }
398 if (!top) {
399 m->m_flags |= M_PKTHDR; /* XXX - temp */
400 m->m_pkthdr.len = 0;
401 m->m_pkthdr.rcvif = NULL;
402 }
403
404 /*
405 * WARNING: the `4 * MLEN' is somewhat dubious. It is better than
406 * `NBPG', which may have no relation to `CLBYTES'. Also,
407 * `CLBYTES' may be so large that we never use clusters,
408 * resulting in far too many mbufs being used. It is often
409 * better to briefly use a cluster, even if we are only using a
410 * portion of it. Since we are on the xmit side, it shouldn't
411 * end up sitting on a queue for a potentially unbounded time
412 * (except perhaps if we are talking to ourself).
413 */
414 if (asize >= (MHLEN + 3 * MLEN)) {
415 MCLGET(m, M_DONTWAIT);
416 }
417 /* now compute usable size */
418 if (M_HASCL(m)) {
419 len = MIN(m->m_ext.ext_size, asize);
420 } else {
421 len = MIN(mlen, asize);
422 }
423
424 tpa = mtod(m, caddr_t);
425 *mp = m;
426 mp = &m->m_next;
427 m->m_len = 0;
428 while (len) {
429 rlen = MIN(len, tl);
430 memcpy(tpa, tdata, rlen);
431 asize -= rlen;
432 len -= rlen;
433 tpa += rlen;
434 m->m_len += rlen;
435 top->m_pkthdr.len += rlen;
436 tdata += rlen;
437 tl -= rlen;
438 if (tl <= 0) {
439 i++;
440 if (i > nvec) {
441 /* shouldn't come here! */
442 asize = 0; /* so we make progress toward completion */
443 break;
444 }
445 tdata = dvec[i].iov_base;
446 tl = dvec[i].iov_len;
447 }
448 }
449
450 if (asize <= 0)
451 break;
452 }
453 tm = top;
454
455 tm->m_act = NULL;
456
457 /* setup mbuf corresponding to destination address */
458 MGETHDR(um, M_DONTWAIT, MT_SONAME);
459 if (!um) {
460 if (top)
461 m_freem(top); /* free mbuf chain */
462 return 1;
463 }
464 memcpy(mtod(um, caddr_t), addr, sizeof(*addr));
465 um->m_len = sizeof(*addr);
466 um->m_pkthdr.len = sizeof(*addr);
467 um->m_flags |= M_PKTHDR;
468
469 SOCKET_LOCK(asocket);
470 code = (*asocket->so_proto->pr_usrreq) (asocket, PRU_SEND, tm, um, 0);
471 SOCKET_UNLOCK(asocket);
472 m_free(um);
473
474 return code;
475 }
476
477
478
479 #endif /* AFS_AIX41_ENV */