src/rx/AIX/rx_knet.c

   1 /*
   2  * Copyright 2000, International Business Machines Corporation and others.
   3  * All Rights Reserved.
   4  *
   5  * This software has been released under the terms of the IBM Public
   6  * License.  For details, see the LICENSE file in the top-level source
   7  * directory or online at http://www.openafs.org/dl/license10.html
   8  */
   9
  10 #include <afsconfig.h>
  11 #include "afs/param.h"
  12
  13
  14 #ifdef AFS_AIX41_ENV
  15 #include "rx/rx_kcommon.h"
  16
  17 static struct protosw parent_proto;     /* udp proto switch */
  18
  19 static void
  20 rxk_input(struct mbuf *am, int hlen)
  21 {
  22     unsigned short *tsp;
  23     int hdr;
  24     struct udphdr *tu;
  25     struct ip *ti;
  26     struct udpiphdr *tvu;
  27     int i;
  28     char *phandle;
  29     long code;
  30     struct sockaddr_in taddr;
  31     int tlen;
  32     short port;
  33     int data_len, comp_sum;
  34     /* make sure we have base ip and udp headers in first mbuf */
  35     if (M_HASCL(am) || am->m_len < 28) {
  36         am = m_pullup(am, 28);
  37         if (!am)
  38             return;
  39     }
  40     hdr = (mtod(am, struct ip *))->ip_hl;
  41     if (hdr > 5) {
  42         /* pull up more, the IP hdr is bigger than usual */
  43         if (am->m_len < (8 + (hdr << 2))) {
  44             am = m_pullup(am, 8 + (hdr << 2));
  45             if (!am)
  46                 return;
  47         }
  48         ti = mtod(am, struct ip *);     /* recompute, since m_pullup allocates new mbuf */
  49         tu = (struct udphdr *)(((char *)ti) + (hdr << 2));      /* skip ip hdr */
  50     } else {
  51         ti = mtod(am, struct ip *);
  52         tu = (struct udphdr *)(((char *)ti) + 20);      /* skip basic ip hdr */
  53     }
  54
  55     /* now read the port out */
  56     port = tu->uh_dport;
  57     if (port) {
  58         for (tsp = rxk_ports, i = 0; i < MAXRXPORTS; i++) {
  59             if (*tsp++ == port) {
  60                 rxk_kpork(am);
  61                 return;
  62             }
  63         }
  64     }
  65     /* if we get here, try to deliver packet to udp */
  66     if (parent_proto.pr_input)
  67         udp_input(am, hlen);
  68 }
  69
  70 /*
  71  * the AIX version is complicated by the fact that the internet protocols
  72  * are in a separate kernel extension, and they are unwilling to export their
  73  * symbols to us.  We can get there indirectly, however.
  74  */
  75 #include <net/netisr.h>
  76 static struct ifqueue rxk_q;    /* RXKluge queue        */
  77 static struct arpcom rxk_bogosity;
  78
  79 /* rxk_kpork -  send pkt over to netwerk kporc for processing */
  80 rxk_kpork(struct mbuf *m)
  81 {
  82     find_input_type(0xdead, m, &rxk_bogosity, 0);
  83 }
  84
  85 /*
  86  * AIX 4.3.3 changed the type of the second argument to
  87  * ip_stripoptions().  The ip_stripoptions() prototype is in
  88  * <netinet/proto_inet.h>.  This header file also acquired a guard
  89  * macro, _PROTO_INET_H_, at the same time.  So we test for the guard
  90  * macro to see which type we need to use for the second argument to
  91  * ip_stripoptions().
  92  *
  93  * This way we don't have to introduce a port just to compile AFS on AIX
  94  * 4.3.3.
  95  */
  96
  97 #if defined(_PROTO_INET_H_)     /* AIX 4.3.3 and presumably later */
  98 #define STRIP_ARG2_TYPE unsigned long
  99 #else /* AIX 4.3.2 and earlier */
 100 #define STRIP_ARG2_TYPE struct mbuf *
 101 #endif
 102
 103 void
 104 ip_stripoptions(struct mbuf *m, STRIP_ARG2_TYPE mopt)
 105 {
 106     struct ip *ip = mtod(m, struct ip *);
 107     int i;
 108     caddr_t opts;
 109     int olen;
 110
 111     olen = (ip->ip_hl << 2) - sizeof(struct ip);
 112     opts = (caddr_t) (ip + 1);
 113     i = m->m_len - (sizeof(struct ip) + olen);
 114     memcpy(opts, opts + olen, (unsigned)i);
 115     m->m_len -= olen;
 116     if (m->m_flags & M_PKTHDR)
 117         m->m_pkthdr.len -= olen;
 118     ip->ip_hl = sizeof(struct ip) >> 2;
 119 }
 120
 121 /* rxk_RX_input -       RX pkt input process */
 122 rxk_RX_input(struct mbuf *am)
 123 {
 124     unsigned short *tsp;
 125     int hdr;
 126     struct udphdr *tu;
 127     struct ip *ti;
 128     struct udpiphdr *tvu;
 129     int i;
 130     struct rx_packet *phandle;
 131     long code;
 132     struct sockaddr_in taddr;
 133     int tlen;
 134     short port;
 135     int data_len, comp_sum;
 136
 137     hdr = (ti = mtod(am, struct ip *))->ip_hl;
 138     if (hdr > 5) {
 139         ip_stripoptions(am, 0); /* get rid of anything we don't need */
 140     }
 141     tu = (struct udphdr *)(((char *)ti) + 20);
 142     /*
 143      * Make mbuf data length reflect UDP length.
 144      * If not enough data to reflect UDP length, drop.
 145      */
 146     tvu = (struct udpiphdr *)ti;
 147     tlen = ntohs((u_short) tvu->ui_ulen);
 148     if ((int)ti->ip_len != tlen) {
 149         if (tlen > (int)ti->ip_len) {
 150 #ifdef RX_KERNEL_TRACE
 151             int glockOwner = ISAFS_GLOCK();
 152             if (!glockOwner)
 153                 AFS_GLOCK();
 154             afs_Trace3(afs_iclSetp, CM_TRACE_WASHERE, ICL_TYPE_STRING,
 155                        __FILE__, ICL_TYPE_INT32, __LINE__, ICL_TYPE_INT32,
 156                        tlen);
 157             if (!glockOwner)
 158                 AFS_GUNLOCK();
 159 #endif
 160             m_free(am);
 161             return;
 162         }
 163         m_adj(am, tlen - (int)ti->ip_len);
 164     }
 165     /* deliver packet to rx */
 166     taddr.sin_family = AF_INET; /* compute source address */
 167     taddr.sin_port = tu->uh_sport;
 168     taddr.sin_addr.s_addr = ti->ip_src.s_addr;
 169     /* handle the checksum.  Note that this code damages the actual ip
 170      * header (replacing it with the virtual one, which is the same size),
 171      * so we must ensure we get everything out we need, first */
 172     if (tu->uh_sum != 0) {
 173         /* if the checksum is there, always check it. It's crazy not
 174          * to, unless you can really be sure that your
 175          * underlying network (and interfaces and drivers and
 176          * DMA hardware, etc!) is error-free. First, fill
 177          * in entire virtual ip header. */
 178 #ifndef AFS_64BIT_KERNEL
 179         tvu->ui_next = 0;
 180         tvu->ui_prev = 0;
 181 #endif
 182         tvu->ui_x1 = 0;
 183         tvu->ui_len = tvu->ui_ulen;
 184         am->m_flags |= M_PKTHDR;
 185         am->m_pkthdr.len = tlen;
 186 #if !defined(AFS_AIX51_ENV) || !defined(AFS_64BIT_KERNEL)
 187         if (in_cksum(am, sizeof(struct ip) + tlen)) {
 188             /* checksum, including cksum field, doesn't come out 0, so
 189              * this packet is bad */
 190 #ifdef RX_KERNEL_TRACE
 191             int glockOwner = ISAFS_GLOCK();
 192             if (!glockOwner)
 193                 AFS_GLOCK();
 194             afs_Trace3(afs_iclSetp, CM_TRACE_WASHERE, ICL_TYPE_STRING,
 195                        __FILE__, ICL_TYPE_INT32, __LINE__, ICL_TYPE_INT32,
 196                        tlen);
 197             if (!glockOwner)
 198                 AFS_GUNLOCK();
 199 #endif
 200             m_freem(am);
 201             return;
 202         }
 203 #else
 204 #ifdef notdef
 205         {                       /* in_cksum() doesn't work correctly or the length is wrong? */
 206             int cksum;
 207             int glockOwner = ISAFS_GLOCK();
 208             cksum = in_cksum(am, sizeof(struct ip) + tlen);
 209             if (!glockOwner)
 210                 AFS_GLOCK();
 211             afs_Trace3(afs_iclSetp, CM_TRACE_WASHERE, ICL_TYPE_STRING,
 212                        __FILE__, ICL_TYPE_INT32, __LINE__, ICL_TYPE_INT32,
 213                        cksum);
 214             if (!glockOwner)
 215                 AFS_GUNLOCK();
 216         }
 217 #endif
 218 #endif
 219     }
 220
 221     /*
 222      * 28 is IP (20) + UDP (8) header.  ulen includes
 223      * udp header, and we *don't* tell RX about udp
 224      * header either.  So, we remove those 8 as well.
 225      */
 226     data_len = ntohs(tu->uh_ulen);
 227     data_len -= 8;
 228     if (!(*rxk_GetPacketProc) (&phandle, data_len)) {
 229         if (rx_mb_to_packet(am, m_freem, 28, data_len, phandle)) {
 230             /* XXX should just increment counter here.. */
 231             printf("rx: truncated UDP packet\n");
 232             rxi_FreePacket(phandle);
 233         } else
 234             (*rxk_PacketArrivalProc) (phandle, &taddr, rx_socket, data_len);
 235     } else
 236         m_freem(am);
 237 }
 238
 239 /* rxk_isr - RX Kluge Input Service Routine */
 240 static
 241 rxk_isr()
 242 {
 243     struct mbuf *m;
 244     IFQ_LOCK_DECL();            /* silly macro has trailing ';'.  Sigh. */
 245     while (1) {
 246         IF_DEQUEUE(&rxk_q, m);
 247         if (!m)
 248             return;
 249         rxk_RX_input(m);
 250     }
 251 }
 252
 253 /*
 254  * UDP fast timer to raise events for all but Solaris and NCR.
 255  * Called about 5 times per second (at unknown priority?).  Must go to
 256  * splnet or obtain global lock before touching anything significant.
 257  */
 258 static void
 259 rxk_fasttimo(void)
 260 {
 261     void (*tproc) (void);
 262     struct clock temp;
 263
 264     /* do rx fasttimo processing here */
 265     rxevent_RaiseEvents(&temp);
 266     if (tproc = parent_proto.pr_fasttimo)
 267         (*tproc) ();
 268 }
 269
 270
 271 void
 272 rxk_init(void)
 273 {
 274     struct protosw *pr;
 275     extern struct protosw *pffindproto();
 276
 277     if (!rxk_initDone && (pr = pffindproto(AF_INET, IPPROTO_UDP, SOCK_DGRAM))) {
 278         parent_proto = *pr;
 279
 280         pr->pr_input = rxk_input;
 281         pr->pr_fasttimo = rxk_fasttimo;
 282
 283
 284         /*
 285          * don't bother with pr_drain and pr_ctlinput
 286          * until we have something to do
 287          */
 288         rxk_q.ifq_maxlen = 128; /* obligatory XXX       */
 289         /* add pseudo pkt types as haque to get back onto net kproc */
 290         if (!add_input_type
 291             (0xdead, NET_KPROC, rxk_isr, &rxk_q, NETISR_MAX - 1))
 292             rxk_initDone = 1;
 293     }
 294
 295     if (!rxk_initDone) {
 296         printf("\nAFS: no INTERNET protocol support found\n");
 297     }
 298 }
 299
 300
 301
 302 void
 303 shutdown_rxkernel(void)
 304 {
 305     struct protosw *pr;
 306     int i;
 307     extern struct protosw *pffindproto();
 308
 309     if (rxk_initDone && (pr = pffindproto(AF_INET, IPPROTO_UDP, SOCK_DGRAM))) {
 310         *pr = parent_proto;
 311
 312         rxk_initDone = 0;
 313         for (i = 0; i < MAXRXPORTS; i++) {
 314             if (rxk_ports[i]) {
 315                 rxk_ports[i] = 0;
 316                 soclose((struct socket *)rxk_portRocks[i]);
 317                 rxk_portRocks[i] = NULL;
 318             }
 319         }
 320         del_input_type(0xdead);
 321     }
 322 }
 323
 324
 325 /* osi_NetSend - send asize bytes at adata from asocket to host at addr.
 326  *
 327  * Now, why do we allocate a new buffer when we could theoretically use the one
 328  * pointed to by adata?  Because PRU_SEND returns after queueing the message,
 329  * not after sending it.  If the sender changes the data after queueing it,
 330  * we'd see the already-queued data change.  One attempt to fix this without
 331  * adding a copy would be to have this function wait until the datagram is
 332  * sent; however this doesn't work well.  In particular, if a host is down, and
 333  * an ARP fails to that host, this packet will be queued until the ARP request
 334  * comes back, which could be hours later.  We can't block in this routine that
 335  * long, since it prevents RPC timeouts from happening.
 336  */
 337 /* XXX In the brave new world, steal the data bufs out of the rx_packet iovec,
 338  * and just queue those.  XXX
 339  */
 340
 341 int
 342 osi_NetSend(osi_socket asocket, struct sockaddr_in *addr, struct iovec *dvec,
 343             int nvec, afs_int32 asize, int istack)
 344 {
 345     struct mbuf *tm, *um;
 346     afs_int32 code;
 347     struct mbuf *top = 0;
 348     struct mbuf *m, **mp;
 349     int len, mlen;
 350     char *tdata;
 351     caddr_t tpa;
 352     int i, tl, rlen;
 353
 354     AFS_STATCNT(osi_NetSend);
 355 #ifndef AFS_AIX41_ENV
 356     /*
 357      * VRMIX has a version of sun's mclgetx() that works correctly with
 358      * respect to mcopy(), so we can just dummy up the entire packet as
 359      * an mbuf cluster, and pass it to the IP output routine (which will
 360      * most likely have to frag it, but since mclgetx() has been fixed,
 361      * will work ok).  The only problem is that we have to wait until
 362      * m_free() has been called on the cluster, to guarantee that we
 363      * do not muck with it until it has gone out.  We also must refrain
 364      * from inadvertantly touching a piece of data that falls within the
 365      * same cache line as any portion of the packet, if we have been lucky
 366      * enough to be DMA-ing directly out from it.
 367      * Certain IBM architects assure me that the rios is fast enough
 368      * that the cost of the extra copy, as opposed to trying to
 369      * DMA directly from the packet is barely worth my while,
 370      * but I have a hard time accepting this.
 371      *
 372      * We can only use this code once we are passed in an indication of
 373      * whether we are being called `process-synchronously' or not.
 374      *
 375      * of course, the packet must be pinned, which is currently true,
 376      * but in future may not be.
 377      */
 378 #endif
 379     mp = &top;
 380     i = 0;
 381     tdata = dvec[0].iov_base;
 382     tl = dvec[0].iov_len;
 383
 384     while (1) {
 385         if (!top) {
 386             MGETHDR(m, M_DONTWAIT, MT_DATA);
 387             mlen = MHLEN;
 388         } else {
 389             MGET(m, M_DONTWAIT, MT_DATA);
 390             mlen = MLEN;
 391         }
 392         if (!m) {
 393             /* can't get an mbuf, give up */
 394             if (top)
 395                 m_freem(top);   /* free mbuf list we're building */
 396             return 1;
 397         }
 398         if (!top) {
 399             m->m_flags |= M_PKTHDR;     /* XXX - temp */
 400             m->m_pkthdr.len = 0;
 401             m->m_pkthdr.rcvif = NULL;
 402         }
 403
 404         /*
 405          * WARNING: the `4 * MLEN' is somewhat dubious.  It is better than
 406          * `NBPG', which may have no relation to `CLBYTES'.  Also,
 407          * `CLBYTES' may be so large that we never use clusters,
 408          * resulting in far too many mbufs being used.  It is often
 409          * better to briefly use a cluster, even if we are only using a
 410          * portion of it.  Since we are on the xmit side, it shouldn't
 411          * end up sitting on a queue for a potentially unbounded time
 412          * (except perhaps if we are talking to ourself).
 413          */
 414         if (asize >= (MHLEN + 3 * MLEN)) {
 415             MCLGET(m, M_DONTWAIT);
 416         }
 417         /* now compute usable size */
 418         if (M_HASCL(m)) {
 419             len = MIN(m->m_ext.ext_size, asize);
 420         } else {
 421             len = MIN(mlen, asize);
 422         }
 423
 424         tpa = mtod(m, caddr_t);
 425         *mp = m;
 426         mp = &m->m_next;
 427         m->m_len = 0;
 428         while (len) {
 429             rlen = MIN(len, tl);
 430             memcpy(tpa, tdata, rlen);
 431             asize -= rlen;
 432             len -= rlen;
 433             tpa += rlen;
 434             m->m_len += rlen;
 435             top->m_pkthdr.len += rlen;
 436             tdata += rlen;
 437             tl -= rlen;
 438             if (tl <= 0) {
 439                 i++;
 440                 if (i > nvec) {
 441                     /* shouldn't come here! */
 442                     asize = 0;  /* so we make progress toward completion */
 443                     break;
 444                 }
 445                 tdata = dvec[i].iov_base;
 446                 tl = dvec[i].iov_len;
 447             }
 448         }
 449
 450         if (asize <= 0)
 451             break;
 452     }
 453     tm = top;
 454
 455     tm->m_act = NULL;
 456
 457     /* setup mbuf corresponding to destination address */
 458     MGETHDR(um, M_DONTWAIT, MT_SONAME);
 459     if (!um) {
 460         if (top)
 461             m_freem(top);       /* free mbuf chain */
 462         return 1;
 463     }
 464     memcpy(mtod(um, caddr_t), addr, sizeof(*addr));
 465     um->m_len = sizeof(*addr);
 466     um->m_pkthdr.len = sizeof(*addr);
 467     um->m_flags |= M_PKTHDR;
 468
 469     SOCKET_LOCK(asocket);
 470     code = (*asocket->so_proto->pr_usrreq) (asocket, PRU_SEND, tm, um, 0);
 471     SOCKET_UNLOCK(asocket);
 472     m_free(um);
 473
 474     return code;
 475 }
 476
 477
 478
 479 #endif /* AFS_AIX41_ENV */