src/rx/IRIX/rx_knet.c

   1 /*
   2  * Copyright 2000, International Business Machines Corporation and others.
   3  * All Rights Reserved.
   4  *
   5  * This software has been released under the terms of the IBM Public
   6  * License.  For details, see the LICENSE file in the top-level source
   7  * directory or online at http://www.openafs.org/dl/license10.html
   8  */
   9
  10 #include <afsconfig.h>
  11 #include "afs/param.h"
  12
  13
  14 #include "rx/rx_kcommon.h"
  15 #include "rx/rx_packet.h"
  16 #include "h/tcp-param.h"
  17 /* This must be loaded after proc.h to avoid macro collision with a variable*/
  18 #include "netinet/udp_var.h"
  19
  20
  21
  22
  23 #ifdef RXK_LISTENER_ENV
  24 /* osi_NetReceive
  25  * OS dependent part of kernel RX listener thread.
  26  *
  27  * Arguments:
  28  *      so      socket to receive on, typically rx_socket
  29  *      from    pointer to a sockaddr_in.
  30  *      iov     array of iovecs to fill in.
  31  *      iovcnt  how many iovecs there are.
  32  *      lengthp IN/OUT in: total space available in iovecs. out: size of read.
  33  *
  34  * Return
  35  * 0 if successful
  36  * error code (such as EINTR) if not
  37  *
  38  * Environment
  39  *      Note that the maximum number of iovecs is 2 + RX_MAXWVECS. This is
  40  *      so we have a little space to look for packets larger than
  41  *      rx_maxReceiveSize.
  42  */
  43 int rxk_lastSocketError = 0;
  44 int rxk_nSocketErrors = 0;
  45 int rxk_nSignalsCleared = 0;
  46
  47 int
  48 osi_NetReceive(osi_socket so, struct sockaddr_in *addr, struct iovec *dvec,
  49                int nvecs, int *alength)
  50 {
  51     struct uio tuio;
  52     int code;
  53     struct mbuf *maddr = NULL;
  54     struct sockaddr_in *taddr;
  55     struct iovec tmpvec[RX_MAXWVECS + 2];
  56 #ifdef AFS_SGI65_ENV
  57     bhv_desc_t bhv;
  58     BHV_PDATA(&bhv) = (void *)so;
  59 #endif
  60
  61     memset(&tuio, 0, sizeof(tuio));
  62     memset(&tmpvec, 0, sizeof(tmpvec));
  63
  64     tuio.uio_iov = tmpvec;
  65     tuio.uio_iovcnt = nvecs;
  66     tuio.uio_offset = 0;
  67     tuio.uio_segflg = AFS_UIOSYS;
  68     tuio.uio_fmode = 0;
  69     tuio.uio_resid = *alength;
  70     tuio.uio_pio = 0;
  71     tuio.uio_pbuf = 0;
  72
  73     if (nvecs > RX_MAXWVECS + 2) {
  74         osi_Panic("Too many (%d) iovecs passed to osi_NetReceive\n", nvecs);
  75     }
  76     memcpy(tmpvec, (char *)dvec, (RX_MAXWVECS + 1) * sizeof(struct iovec));
  77 #ifdef AFS_SGI65_ENV
  78     code = soreceive(&bhv, &maddr, &tuio, NULL, NULL);
  79 #else
  80     code = soreceive(so, &maddr, &tuio, NULL, NULL);
  81 #endif
  82
  83     if (code) {
  84 #ifdef AFS_SGI65_ENV
  85         /* Clear the error before using the socket again. I've tried being nice
  86          * and blocking SIGKILL and SIGSTOP from the kernel, but they get
  87          * delivered anyway. So, time to be crude and just clear the signals
  88          * pending on this thread.
  89          */
  90         if (code == EINTR) {
  91             uthread_t *ut = curuthread;
  92             int s;
  93             s = ut_lock(ut);
  94             sigemptyset(&ut->ut_sig);
  95             ut->ut_cursig = 0;
  96             thread_interrupt_clear(UT_TO_KT(ut), 1);
  97             ut_unlock(ut, s);
  98             rxk_nSignalsCleared++;
  99         }
 100 #endif
 101         /* Clear the error before using the socket again. */
 102         so->so_error = 0;
 103         rxk_lastSocketError = code;
 104         rxk_nSocketErrors++;
 105         if (maddr)
 106             m_freem(maddr);
 107     } else {
 108         *alength = *alength - tuio.uio_resid;
 109         if (maddr) {
 110             memcpy((char *)addr, (char *)mtod(maddr, struct sockaddr_in *),
 111                    sizeof(struct sockaddr_in));
 112             m_freem(maddr);
 113         } else {
 114             return -1;
 115         }
 116     }
 117     return code;
 118 }
 119 #else /* RXK_LISTENER_ENV */
 120
 121 static struct protosw parent_proto;     /* udp proto switch */
 122
 123 /*
 124  * RX input, fast timer and initialization routines.
 125  */
 126
 127 #ifdef AFS_SGI64_ENV
 128 static void
 129 rxk_input(struct mbuf *am, struct ifnet *aif, struct ipsec *spec)
 130 #else
 131 static void
 132 rxk_input(struct mbuf *am, struct ifnet *aif)
 133 #endif
 134 {
 135     void (*tproc) ();
 136     unsigned short *tsp;
 137     int hdr;
 138     struct udphdr *tu;
 139     struct ip *ti;
 140     struct udpiphdr *tvu;
 141     int i;
 142     char *phandle;
 143     struct sockaddr_in taddr;
 144     int tlen;
 145     short port;
 146     int data_len;
 147
 148     /* make sure we have base ip and udp headers in first mbuf */
 149     if (am->m_off > MMAXOFF || am->m_len < 28) {
 150         am = m_pullup(am, 28);
 151         if (!am)
 152             return;
 153     }
 154
 155     hdr = (mtod(am, struct ip *))->ip_hl;
 156     if (hdr > 5) {
 157         /* pull up more, the IP hdr is bigger than usual */
 158         if (am->m_len < (8 + (hdr << 2))) {
 159             am = m_pullup(am, 8 + (hdr << 2));
 160             if (!am)
 161                 return;
 162         }
 163         ti = mtod(am, struct ip *);     /* recompute, since m_pullup allocates new mbuf */
 164         tu = (struct udphdr *)(((char *)ti) + (hdr << 2));      /* skip ip hdr */
 165     } else {
 166         ti = mtod(am, struct ip *);
 167         tu = (struct udphdr *)(((char *)ti) + 20);      /* skip basic ip hdr */
 168     }
 169     /* now read the port out */
 170     port = tu->uh_dport;
 171
 172     if (port) {
 173         for (tsp = rxk_ports, i = 0; i < MAXRXPORTS; i++) {
 174             if (*tsp++ == port) {
 175                 /* checksum the packet */
 176                 if (hdr > 5) {
 177                     ip_stripoptions(am, (struct mbuf *)0);      /* get rid of anything we don't need */
 178                     tu = (struct udphdr *)(((char *)ti) + 20);
 179                 }
 180                 /*
 181                  * Make mbuf data length reflect UDP length.
 182                  * If not enough data to reflect UDP length, drop.
 183                  */
 184                 tvu = (struct udpiphdr *)ti;
 185                 tlen = ntohs((u_short) tvu->ui_ulen);
 186                 if ((int)ti->ip_len != tlen) {
 187                     if (tlen > (int)ti->ip_len) {
 188                         m_free(am);
 189                         return;
 190                     }
 191                     m_adj(am, tlen - (int)ti->ip_len);
 192                 }
 193                 /* deliver packet to rx */
 194                 taddr.sin_family = AF_INET;     /* compute source address */
 195                 taddr.sin_port = tu->uh_sport;
 196                 taddr.sin_addr.s_addr = ti->ip_src.s_addr;
 197                 /* handle the checksum.  Note that this code damages the actual ip
 198                  * header (replacing it with the virtual one, which is the same size),
 199                  * so we must ensure we get everything out we need, first */
 200                 if (tu->uh_sum != 0) {
 201                     /* if the checksum is there, always check it. It's crazy not
 202                      * to, unless you can really be sure that your
 203                      * underlying network (and interfaces and drivers and
 204                      * DMA hardware, etc!) is error-free. First, fill
 205                      * in entire virtual ip header. */
 206                     tvu->ui_next = 0;
 207                     tvu->ui_prev = 0;
 208                     tvu->ui_x1 = 0;
 209                     tvu->ui_len = tvu->ui_ulen;
 210                     tlen = ntohs((unsigned short)(tvu->ui_ulen));
 211                     if ((!(am->m_flags & M_CKSUMMED))
 212                         && in_cksum(am, sizeof(struct ip) + tlen)) {
 213                         /* checksum, including cksum field, doesn't come out 0, so
 214                          * this packet is bad */
 215                         m_freem(am);
 216                         return;
 217                     }
 218                 }
 219
 220                 /*
 221                  * 28 is IP (20) + UDP (8) header.  ulen includes
 222                  * udp header, and we *don't* tell RX about udp
 223                  * header either.  So, we remove those 8 as well.
 224                  */
 225                 data_len = ntohs(tu->uh_ulen);
 226                 data_len -= 8;
 227                 if (!(*rxk_GetPacketProc) (&phandle, data_len)) {
 228                     if (rx_mb_to_packet(am, m_freem, 28, data_len, phandle)) {
 229                         /* XXX should just increment counter here.. */
 230                         printf("rx: truncated UDP packet\n");
 231                         rxi_FreePacket(phandle);
 232                     } else
 233                         (*rxk_PacketArrivalProc) (phandle, &taddr,
 234                                                   rxk_portRocks[i], data_len);
 235                 } else
 236                     m_freem(am);
 237                 return;
 238             }
 239         }
 240     }
 241
 242     /* if we get here, try to deliver packet to udp */
 243     if (tproc = parent_proto.pr_input)
 244         (*tproc) (am, aif);
 245     return;
 246 }
 247
 248 /*
 249  * UDP fast timer to raise events for all but Solaris and NCR.
 250  * Called about 5 times per second (at unknown priority?).  Must go to
 251  * splnet or obtain global lock before touching anything significant.
 252  */
 253 static void
 254 rxk_fasttimo(void)
 255 {
 256     int (*tproc) ();
 257     struct clock temp;
 258
 259     /* do rx fasttimo processing here */
 260     rxevent_RaiseEvents(&temp);
 261     if (tproc = parent_proto.pr_fasttimo)
 262         (*tproc) ();
 263 }
 264
 265
 266 /* start intercepting basic calls */
 267 void
 268 rxk_init(void)
 269 {
 270     struct protosw *tpro, *last;
 271     if (rxk_initDone)
 272         return;
 273
 274     last = inetdomain.dom_protoswNPROTOSW;
 275     for (tpro = inetdomain.dom_protosw; tpro < last; tpro++) {
 276         if (tpro->pr_protocol == IPPROTO_UDP) {
 277             memcpy(&parent_proto, tpro, sizeof(parent_proto));
 278             tpro->pr_input = rxk_input;
 279             tpro->pr_fasttimo = rxk_fasttimo;
 280             rxk_initDone = 1;
 281             return;
 282         }
 283     }
 284     osi_Panic("inet:no udp");
 285 }
 286 #endif /* RXK_LISTENER_ENV */
 287
 288 /*
 289  * RX IP address routines.
 290  */
 291
 292 static afs_uint32 myNetAddrs[ADDRSPERSITE];
 293 static int myNetMTUs[ADDRSPERSITE];
 294 static int myNetFlags[ADDRSPERSITE];
 295 static int numMyNetAddrs = 0;
 296
 297 /* This version doesn't even begin to handle iterative requests, but then
 298  * we don't yet use them anyway. Fix this when rxi_InitPeerParams is changed
 299  * to find a true maximum.
 300  */
 301 static int
 302 rxi_MatchIfnet(struct hashbucket *h, caddr_t key, caddr_t arg1, caddr_t arg2)
 303 {
 304     afs_uint32 ppaddr = *(afs_uint32 *) key;
 305     int match_value = *(int *)arg1;
 306     struct in_ifaddr *ifa = (struct in_ifaddr *)h;
 307     struct sockaddr_in *sin;
 308
 309     if ((ppaddr & ifa->ia_netmask) == ifa->ia_net) {
 310         if ((ppaddr & ifa->ia_subnetmask) == ifa->ia_subnet) {
 311             sin = IA_SIN(ifa);
 312             if (sin->sin_addr.s_addr == ppaddr) {       /* ie, ME!!!  */
 313                 match_value = 4;
 314                 *(struct in_ifaddr **)arg2 = ifa;
 315             }
 316             if (match_value < 3) {
 317                 *(struct in_ifaddr **)arg2 = ifa;
 318                 match_value = 3;
 319             }
 320         } else {
 321             if (match_value < 2) {
 322                 *(struct in_ifaddr **)arg2 = ifa;
 323                 match_value = 2;
 324             }
 325         }
 326     }
 327     *(int *)arg1 = match_value;
 328     return 0;
 329 }
 330
 331
 332 struct ifnet *
 333 rxi_FindIfnet(afs_uint32 addr, afs_uint32 * maskp)
 334 {
 335     afs_uint32 ppaddr;
 336     int match_value = 0;
 337     struct in_ifaddr *ifad;
 338
 339     if (numMyNetAddrs == 0)
 340         (void)rxi_GetIFInfo();
 341
 342     ppaddr = ntohl(addr);
 343     ifad = (struct in_ifaddr *)&hashinfo_inaddr;
 344
 345     (void)hash_enum(&hashinfo_inaddr, rxi_MatchIfnet, HTF_INET,
 346                     (caddr_t) & ppaddr, (caddr_t) & match_value,
 347                     (caddr_t) & ifad);
 348
 349     if (match_value) {
 350         if (maskp)
 351             *maskp = ifad->ia_subnetmask;
 352         return ifad->ia_ifp;
 353     } else
 354         return NULL;
 355 }
 356
 357 static int
 358 rxi_EnumGetIfInfo(struct hashbucket *h, caddr_t key, caddr_t arg1,
 359                   caddr_t arg2)
 360 {
 361     int different = *(int *)arg1;
 362     int i = *(int *)arg2;
 363     struct in_ifaddr *iap = (struct in_ifaddr *)h;
 364     struct ifnet *ifnp;
 365     afs_uint32 ifinaddr;
 366     afs_uint32 rxmtu;
 367
 368     if (i >= ADDRSPERSITE)
 369         return 0;
 370
 371     ifnp = iap->ia_ifp;
 372     rxmtu = (ifnp->if_mtu - RX_IPUDP_SIZE);
 373     ifinaddr = ntohl(iap->ia_addr.sin_addr.s_addr);
 374     if (myNetAddrs[i] != ifinaddr) {
 375         myNetAddrs[i] = ifinaddr;
 376         myNetMTUs[i] = rxmtu;
 377         different++;
 378         *(int *)arg1 = different;
 379     }
 380     rxmtu = rxmtu * rxi_nRecvFrags + ((rxi_nRecvFrags - 1) * UDP_HDR_SIZE);
 381     if (!rx_IsLoopbackAddr(ifinaddr) && (rxmtu > rx_maxReceiveSize)) {
 382         rx_maxReceiveSize = MIN(RX_MAX_PACKET_SIZE, rxmtu);
 383         rx_maxReceiveSize = MIN(rx_maxReceiveSize, rx_maxReceiveSizeUser);
 384     }
 385
 386     *(int *)arg2 = i + 1;
 387     return 0;
 388 }
 389
 390 int
 391 rxi_GetIFInfo()
 392 {
 393     int i = 0;
 394     int different = 0;
 395
 396     /* SGI 6.2 does not have a pointer from the ifnet to the list of
 397      * of addresses (if_addrlist). So it's more efficient to run the
 398      * in_ifaddr list and use the back pointers to the ifnet struct's.
 399      */
 400     (void)hash_enum(&hashinfo_inaddr, rxi_EnumGetIfInfo, HTF_INET, NULL,
 401                     (caddr_t) & different, (caddr_t) & i);
 402
 403     rx_maxJumboRecvSize =
 404         RX_HEADER_SIZE + rxi_nDgramPackets * RX_JUMBOBUFFERSIZE +
 405         (rxi_nDgramPackets - 1) * RX_JUMBOHEADERSIZE;
 406     rx_maxJumboRecvSize = MAX(rx_maxJumboRecvSize, rx_maxReceiveSize);
 407
 408     return different;
 409 }
 410
 411 /* osi_NetSend - from the now defunct afs_osinet.c */
 412 #ifdef DEBUG
 413 #undef DEBUG
 414 #endif
 415 #ifdef MP
 416 #define _MP_NETLOCKS
 417 #endif
 418
 419 #ifdef AFS_SGI65_ENV
 420 osi_NetSend(asocket, addr, dvec, nvec, asize, istack)
 421      osi_socket *asocket;
 422      struct iovec *dvec;
 423      int nvec;
 424      afs_int32 asize;
 425      struct sockaddr_in *addr;
 426      int istack;
 427 {
 428     int code;
 429     struct iovec tvecs[RX_MAXWVECS + 1];
 430     struct iovec *iovp;
 431     struct uio tuio;
 432     struct mbuf *to;
 433     int i;
 434     bhv_desc_t bhv;
 435
 436     memset(&tuio, 0, sizeof(tuio));
 437     memset(&tvecs, 0, sizeof(tvecs));
 438
 439     if (nvec > RX_MAXWVECS + 1) {
 440         osi_Panic("osi_NetSend: %d: Too many iovecs.\n", nvec);
 441     }
 442     memcpy((char *)tvecs, (char *)dvec, nvec * sizeof(struct iovec));
 443
 444     tuio.uio_iov = tvecs;
 445     tuio.uio_iovcnt = nvec;
 446     tuio.uio_segflg = UIO_SYSSPACE;
 447     tuio.uio_offset = 0;
 448     tuio.uio_sigpipe = 0;
 449     tuio.uio_pio = 0;
 450     tuio.uio_pbuf = 0;
 451
 452     tuio.uio_resid = 0;
 453     for (i = 0, iovp = tvecs; i < nvec; i++, iovp++)
 454         tuio.uio_resid += iovp->iov_len;
 455
 456
 457     to = m_get(M_WAIT, MT_SONAME);
 458     to->m_len = sizeof(struct sockaddr_in);
 459     memcpy(mtod(to, caddr_t), (char *)addr, to->m_len);
 460
 461     BHV_PDATA(&bhv) = (void *)asocket;
 462     code = sosend(&bhv, to, &tuio, 0, NULL);
 463
 464     m_freem(to);
 465     return code;
 466 }
 467 #else /* AFS_SGI65_ENV */
 468
 469 int
 470 dummy_sblock(struct sockbuf *a, int b, struct socket *c, int *d, int e)
 471 {
 472     afs_warn
 473         ("sblock was called before it was installed. Install proper afsd.\n");
 474 }
 475
 476 void
 477 dummy_sbunlock(struct sockbuf *a, int b, struct socket *c, int d)
 478 {
 479     afs_warn
 480         ("sbunlock was called before it was installed. Install proper afsd.\n");
 481 }
 482
 483 int (*afs_sblockp) (struct sockbuf *, int, struct socket *, int *, int) =
 484     dummy_sblock;
 485 void (*afs_sbunlockp) (struct sockbuf *, int, struct socket *, int) =
 486     dummy_sbunlock;
 487 #define AFS_SBUNLOCK(SB, EV, SO, O) (*afs_sbunlockp)(SB, EV, SO, O)
 488
 489 /* osi_NetSend - send asize bytes at adata from asocket to host at addr.
 490  *
 491  * Now, why do we allocate a new buffer when we could theoretically use the one
 492  * pointed to by adata?  Because PRU_SEND returns after queueing the message,
 493  * not after sending it.  If the sender changes the data after queueing it,
 494  * we'd see the already-queued data change.  One attempt to fix this without
 495  * adding a copy would be to have this function wait until the datagram is
 496  * sent; however this doesn't work well.  In particular, if a host is down, and
 497  * an ARP fails to that host, this packet will be queued until the ARP request
 498  * comes back, which could be hours later.  We can't block in this routine that
 499  * long, since it prevents RPC timeouts from happening.
 500  */
 501 /* XXX In the brave new world, steal the data bufs out of the rx_packet iovec,
 502  * and just queue those.  XXX
 503  */
 504 int
 505 osi_NetSend(asocket, addr, dvec, nvec, asize, istack)
 506      struct socket *asocket;
 507      struct iovec *dvec;
 508      int nvec;
 509      afs_int32 asize;
 510      struct sockaddr_in *addr;
 511      int istack;
 512 {
 513     struct mbuf *tm, *um;
 514     afs_int32 code;
 515     int s;
 516     struct mbuf *top = 0;
 517     struct mbuf *m, **mp;
 518     int len;
 519     char *tdata;
 520     caddr_t tpa;
 521     int i, tl, rlen;
 522
 523     NETSPL_DECL(s1)
 524         AFS_STATCNT(osi_NetSend);
 525
 526     (*afs_sblockp) (&asocket->so_snd, NETEVENT_SODOWN, asocket, &s1, istack);
 527
 528     s = splnet();
 529     mp = &top;
 530     i = 0;
 531     tdata = dvec[i].iov_base;
 532     tl = dvec[i].iov_len;
 533     while (1) {
 534         if ((m = m_vget(M_DONTWAIT, MIN(asize, VCL_MAX), MT_DATA)) == NULL) {
 535             if (top)
 536                 m_freem(top);
 537             splx(s);
 538             AFS_SBUNLOCK(&asocket->so_snd, NETEVENT_SODOWN, asocket, s1);
 539             return 1;
 540         }
 541         len = MIN(m->m_len, asize);
 542         m->m_len = 0;
 543         tpa = mtod(m, caddr_t);
 544         while (len) {
 545             rlen = MIN(len, tl);
 546             memcpy(tpa, tdata, rlen);
 547             asize -= rlen;
 548             len -= rlen;
 549             tpa += rlen;
 550             m->m_len += rlen;
 551             tdata += rlen;
 552             tl -= rlen;
 553             if (tl <= 0) {
 554                 i++;
 555                 if (i > nvec) {
 556                     /* shouldn't come here! */
 557                     asize = 0;  /* so we make progress toward completion */
 558                     break;
 559                 }
 560                 tdata = dvec[i].iov_base;
 561                 tl = dvec[i].iov_len;
 562             }
 563         }
 564         *mp = m;
 565         mp = &m->m_next;
 566         if (asize <= 0)
 567             break;
 568     }
 569     tm = top;
 570
 571     tm->m_act = NULL;
 572
 573     /* setup mbuf corresponding to destination address */
 574     um = m_get(M_DONTWAIT, MT_SONAME);
 575     if (!um) {
 576         if (top)
 577             m_freem(top);       /* free mbuf chain */
 578         /* if this were vfs40, we'd do sbunlock(asocket, &asocket->so_snd), but
 579          * we don't do the locking at all for vfs40 systems */
 580         splx(s);
 581         AFS_SBUNLOCK(&asocket->so_snd, NETEVENT_SODOWN, asocket, s1);
 582         return 1;
 583     }
 584     memcpy(mtod(um, caddr_t), addr, sizeof(*addr));
 585     um->m_len = sizeof(*addr);
 586     /* note that udp_usrreq frees funny mbuf.  We hold onto data, but mbuf
 587      * around it is gone.  we free address ourselves.  */
 588     code = (*asocket->so_proto->pr_usrreq) (asocket, PRU_SEND, tm, um, 0);
 589     splx(s);
 590     m_free(um);
 591     AFS_SBUNLOCK(&asocket->so_snd, NETEVENT_SODOWN, asocket, s1);
 592
 593     return code;
 594 }
 595 #endif /* AFS_SGI65_ENV */