Import Upstream version 1.8.5
[hcoop/debian/openafs.git] / src / rx / rx_packet.c
1 /*
2 * Copyright 2000, International Business Machines Corporation and others.
3 * All Rights Reserved.
4 *
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
8 */
9
10 #include <afsconfig.h>
11 #include <afs/param.h>
12
13 #ifdef KERNEL
14 # if defined(UKERNEL)
15 # include "afs/sysincludes.h"
16 # include "afsincludes.h"
17 # include "rx_kcommon.h"
18 # else /* defined(UKERNEL) */
19 # ifdef RX_KERNEL_TRACE
20 # include "rx_kcommon.h"
21 # endif
22 # include "h/types.h"
23 # ifndef AFS_LINUX20_ENV
24 # include "h/systm.h"
25 # endif
26 # if defined(AFS_SGI_ENV) || defined(AFS_HPUX110_ENV) || defined(AFS_NBSD50_ENV)
27 # include "afs/sysincludes.h"
28 # endif
29 # if defined(AFS_OBSD_ENV)
30 # include "h/proc.h"
31 # endif
32 # include "h/socket.h"
33 # if !defined(AFS_SUN5_ENV) && !defined(AFS_LINUX20_ENV) && !defined(AFS_HPUX110_ENV)
34 # if !defined(AFS_OSF_ENV) && !defined(AFS_AIX41_ENV)
35 # include "sys/mount.h" /* it gets pulled in by something later anyway */
36 # endif
37 # include "h/mbuf.h"
38 # endif
39 # include "netinet/in.h"
40 # include "afs/afs_osi.h"
41 # include "rx_kmutex.h"
42 # endif /* defined(UKERNEL) */
43 #else /* KERNEL */
44 # include <roken.h>
45 # include <assert.h>
46 # include <afs/opr.h>
47 # if defined(AFS_NT40_ENV)
48 # ifndef EWOULDBLOCK
49 # define EWOULDBLOCK WSAEWOULDBLOCK
50 # endif
51 # include "rx_user.h"
52 # include "rx_xmit_nt.h"
53 # endif
54 # include <lwp.h>
55 #endif /* KERNEL */
56
57 #ifdef AFS_SUN5_ENV
58 # include <sys/sysmacros.h>
59 #endif
60
61 #include <opr/queue.h>
62
63 #include "rx.h"
64 #include "rx_clock.h"
65 #include "rx_packet.h"
66 #include "rx_atomic.h"
67 #include "rx_globals.h"
68 #include "rx_internal.h"
69 #include "rx_stats.h"
70
71 #include "rx_peer.h"
72 #include "rx_conn.h"
73 #include "rx_call.h"
74
75 /*!
76 * \brief structure used to keep track of allocated packets
77 */
78 struct rx_mallocedPacket {
79 struct opr_queue entry; /*!< chained using opr_queue */
80 struct rx_packet *addr; /*!< address of the first element */
81 afs_uint32 size; /*!< array size in bytes */
82 };
83
84 #ifdef RX_LOCKS_DB
85 /* rxdb_fileID is used to identify the lock location, along with line#. */
86 static int rxdb_fileID = RXDB_FILE_RX_PACKET;
87 #endif /* RX_LOCKS_DB */
88 static struct rx_packet *rx_mallocedP = 0;
89 #ifdef RXDEBUG_PACKET
90 static afs_uint32 rx_packet_id = 0;
91 #endif
92
93 extern char cml_version_number[];
94
95 static int AllocPacketBufs(int class, int num_pkts, struct opr_queue *q);
96
97 static void rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
98 afs_uint32 ahost, short aport,
99 afs_int32 istack);
100 static struct rx_packet *rxi_AllocPacketNoLock(int class);
101
102 #ifndef KERNEL
103 static void rxi_MorePacketsNoLock(int apackets);
104 #endif
105
106 #ifdef RX_ENABLE_TSFPQ
107 static int rxi_FreeDataBufsTSFPQ(struct rx_packet *p, afs_uint32 first,
108 int flush_global);
109 static void rxi_AdjustLocalPacketsTSFPQ(int num_keep_local,
110 int allow_overcommit);
111 #else
112 static void rxi_FreePacketNoLock(struct rx_packet *p);
113 static int rxi_FreeDataBufsNoLock(struct rx_packet *p, afs_uint32 first);
114 static int rxi_FreeDataBufsToQueue(struct rx_packet *p, afs_uint32 first,
115 struct opr_queue * q);
116 #endif
117
118 extern struct opr_queue rx_idleServerQueue;
119
120 /* some rules about packets:
121 * 1. When a packet is allocated, the final iov_buf contains room for
122 * a security trailer, but iov_len masks that fact. If the security
123 * package wants to add the trailer, it may do so, and then extend
124 * iov_len appropriately. For this reason, packet's niovecs and
125 * iov_len fields should be accurate before calling PreparePacket.
126 */
127
128 /* Preconditions:
129 * all packet buffers (iov_base) are integral multiples of
130 * the word size.
131 * offset is an integral multiple of the word size.
132 */
133 afs_int32
134 rx_SlowGetInt32(struct rx_packet *packet, size_t offset)
135 {
136 unsigned int i;
137 size_t l;
138 for (l = 0, i = 1; i < packet->niovecs; i++) {
139 if (l + packet->wirevec[i].iov_len > offset) {
140 return
141 *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
142 (offset - l)));
143 }
144 l += packet->wirevec[i].iov_len;
145 }
146
147 return 0;
148 }
149
150 /* Preconditions:
151 * all packet buffers (iov_base) are integral multiples of the word size.
152 * offset is an integral multiple of the word size.
153 */
154 afs_int32
155 rx_SlowPutInt32(struct rx_packet * packet, size_t offset, afs_int32 data)
156 {
157 unsigned int i;
158 size_t l;
159 for (l = 0, i = 1; i < packet->niovecs; i++) {
160 if (l + packet->wirevec[i].iov_len > offset) {
161 *((afs_int32 *) ((char *)(packet->wirevec[i].iov_base) +
162 (offset - l))) = data;
163 return 0;
164 }
165 l += packet->wirevec[i].iov_len;
166 }
167
168 return 0;
169 }
170
171 /* Preconditions:
172 * all packet buffers (iov_base) are integral multiples of the
173 * word size.
174 * offset is an integral multiple of the word size.
175 * Packet Invariants:
176 * all buffers are contiguously arrayed in the iovec from 0..niovecs-1
177 */
178 afs_int32
179 rx_SlowReadPacket(struct rx_packet * packet, unsigned int offset, int resid,
180 char *out)
181 {
182 unsigned int i, j, l, r;
183 for (l = 0, i = 1; i < packet->niovecs; i++) {
184 if (l + packet->wirevec[i].iov_len > offset) {
185 break;
186 }
187 l += packet->wirevec[i].iov_len;
188 }
189
190 /* i is the iovec which contains the first little bit of data in which we
191 * are interested. l is the total length of everything prior to this iovec.
192 * j is the number of bytes we can safely copy out of this iovec.
193 * offset only applies to the first iovec.
194 */
195 r = resid;
196 while ((r > 0) && (i < packet->niovecs)) {
197 j = MIN(r, packet->wirevec[i].iov_len - (offset - l));
198 memcpy(out, (char *)(packet->wirevec[i].iov_base) + (offset - l), j);
199 r -= j;
200 out += j;
201 l += packet->wirevec[i].iov_len;
202 offset = l;
203 i++;
204 }
205
206 return (r ? (resid - r) : resid);
207 }
208
209
210 /* Preconditions:
211 * all packet buffers (iov_base) are integral multiples of the
212 * word size.
213 * offset is an integral multiple of the word size.
214 */
215 afs_int32
216 rx_SlowWritePacket(struct rx_packet * packet, int offset, int resid, char *in)
217 {
218 unsigned int i, j, l, o, r;
219 char *b;
220
221 for (l = 0, i = 1, o = offset; i < packet->niovecs; i++) {
222 if (l + packet->wirevec[i].iov_len > o) {
223 break;
224 }
225 l += packet->wirevec[i].iov_len;
226 }
227
228 /* i is the iovec which contains the first little bit of data in which we
229 * are interested. l is the total length of everything prior to this iovec.
230 * j is the number of bytes we can safely copy out of this iovec.
231 * offset only applies to the first iovec.
232 */
233 r = resid;
234 while ((r > 0) && (i <= RX_MAXWVECS)) {
235 if (i >= packet->niovecs)
236 if (rxi_AllocDataBuf(packet, r, RX_PACKET_CLASS_SEND_CBUF) > 0) /* ++niovecs as a side-effect */
237 break;
238
239 b = (char *)(packet->wirevec[i].iov_base) + (offset - l);
240 j = MIN(r, packet->wirevec[i].iov_len - (offset - l));
241 memcpy(b, in, j);
242 r -= j;
243 in += j;
244 l += packet->wirevec[i].iov_len;
245 offset = l;
246 i++;
247 }
248
249 return (r ? (resid - r) : resid);
250 }
251
252 int
253 rxi_AllocPackets(int class, int num_pkts, struct opr_queue * q)
254 {
255 struct opr_queue *c;
256
257 num_pkts = AllocPacketBufs(class, num_pkts, q);
258
259 for (opr_queue_Scan(q, c)) {
260 RX_PACKET_IOV_FULLINIT(opr_queue_Entry(c, struct rx_packet, entry));
261 }
262
263 return num_pkts;
264 }
265
266 #ifdef RX_ENABLE_TSFPQ
267 static int
268 AllocPacketBufs(int class, int num_pkts, struct opr_queue * q)
269 {
270 struct rx_ts_info_t * rx_ts_info;
271 int transfer;
272 SPLVAR;
273
274 RX_TS_INFO_GET(rx_ts_info);
275
276 transfer = num_pkts - rx_ts_info->_FPQ.len;
277 if (transfer > 0) {
278 NETPRI;
279 MUTEX_ENTER(&rx_freePktQ_lock);
280 transfer = MAX(transfer, rx_TSFPQGlobSize);
281 if (transfer > rx_nFreePackets) {
282 /* alloc enough for us, plus a few globs for other threads */
283 rxi_MorePacketsNoLock(transfer + 4 * rx_initSendWindow);
284 }
285
286 RX_TS_FPQ_GTOL2(rx_ts_info, transfer);
287
288 MUTEX_EXIT(&rx_freePktQ_lock);
289 USERPRI;
290 }
291
292 RX_TS_FPQ_QCHECKOUT(rx_ts_info, num_pkts, q);
293
294 return num_pkts;
295 }
296 #else /* RX_ENABLE_TSFPQ */
297 static int
298 AllocPacketBufs(int class, int num_pkts, struct opr_queue * q)
299 {
300 struct rx_packet *c;
301 int i;
302 #ifdef KERNEL
303 int overq = 0;
304 #endif
305 SPLVAR;
306
307 NETPRI;
308
309 MUTEX_ENTER(&rx_freePktQ_lock);
310
311 #ifdef KERNEL
312 for (; (num_pkts > 0) && (rxi_OverQuota2(class,num_pkts));
313 num_pkts--, overq++);
314
315 if (overq) {
316 rxi_NeedMorePackets = TRUE;
317 if (rx_stats_active) {
318 switch (class) {
319 case RX_PACKET_CLASS_RECEIVE:
320 rx_atomic_inc(&rx_stats.receivePktAllocFailures);
321 break;
322 case RX_PACKET_CLASS_SEND:
323 rx_atomic_inc(&rx_stats.sendPktAllocFailures);
324 break;
325 case RX_PACKET_CLASS_SPECIAL:
326 rx_atomic_inc(&rx_stats.specialPktAllocFailures);
327 break;
328 case RX_PACKET_CLASS_RECV_CBUF:
329 rx_atomic_inc(&rx_stats.receiveCbufPktAllocFailures);
330 break;
331 case RX_PACKET_CLASS_SEND_CBUF:
332 rx_atomic_inc(&rx_stats.sendCbufPktAllocFailures);
333 break;
334 }
335 }
336 }
337
338 if (rx_nFreePackets < num_pkts)
339 num_pkts = rx_nFreePackets;
340
341 if (!num_pkts) {
342 rxi_NeedMorePackets = TRUE;
343 goto done;
344 }
345 #else /* KERNEL */
346 if (rx_nFreePackets < num_pkts) {
347 rxi_MorePacketsNoLock(MAX((num_pkts-rx_nFreePackets), 4 * rx_initSendWindow));
348 }
349 #endif /* KERNEL */
350
351 for (i=0, c=opr_queue_First(&rx_freePacketQueue, struct rx_packet, entry);
352 i < num_pkts;
353 i++, c=opr_queue_Next(&c->entry, struct rx_packet, entry)) {
354 RX_FPQ_MARK_USED(c);
355 }
356
357 opr_queue_SplitBeforeAppend(&rx_freePacketQueue, q, &c->entry);
358
359 rx_nFreePackets -= num_pkts;
360
361 #ifdef KERNEL
362 done:
363 #endif
364 MUTEX_EXIT(&rx_freePktQ_lock);
365
366 USERPRI;
367 return num_pkts;
368 }
369 #endif /* RX_ENABLE_TSFPQ */
370
371 /*
372 * Free a packet currently used as a continuation buffer
373 */
374 #ifdef RX_ENABLE_TSFPQ
375 /* num_pkts=0 means queue length is unknown */
376 int
377 rxi_FreePackets(int num_pkts, struct opr_queue * q)
378 {
379 struct rx_ts_info_t * rx_ts_info;
380 struct opr_queue *cursor, *store;
381 SPLVAR;
382
383 osi_Assert(num_pkts >= 0);
384 RX_TS_INFO_GET(rx_ts_info);
385
386 if (!num_pkts) {
387 for (opr_queue_ScanSafe(q, cursor, store)) {
388 num_pkts++;
389 rxi_FreeDataBufsTSFPQ(opr_queue_Entry(cursor, struct rx_packet,
390 entry), 2, 0);
391 }
392 } else {
393 for (opr_queue_ScanSafe(q, cursor, store)) {
394 rxi_FreeDataBufsTSFPQ(opr_queue_Entry(cursor, struct rx_packet,
395 entry), 2, 0);
396 }
397 }
398
399 if (num_pkts) {
400 RX_TS_FPQ_QCHECKIN(rx_ts_info, num_pkts, q);
401 }
402
403 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
404 NETPRI;
405 MUTEX_ENTER(&rx_freePktQ_lock);
406
407 RX_TS_FPQ_LTOG(rx_ts_info);
408
409 /* Wakeup anyone waiting for packets */
410 rxi_PacketsUnWait();
411
412 MUTEX_EXIT(&rx_freePktQ_lock);
413 USERPRI;
414 }
415
416 return num_pkts;
417 }
418 #else /* RX_ENABLE_TSFPQ */
419 /* num_pkts=0 means queue length is unknown */
420 int
421 rxi_FreePackets(int num_pkts, struct opr_queue *q)
422 {
423 struct opr_queue cbs;
424 struct opr_queue *cursor, *store;
425 int qlen = 0;
426 SPLVAR;
427
428 osi_Assert(num_pkts >= 0);
429 opr_queue_Init(&cbs);
430
431 if (!num_pkts) {
432 for (opr_queue_ScanSafe(q, cursor, store)) {
433 struct rx_packet *p
434 = opr_queue_Entry(cursor, struct rx_packet, entry);
435 if (p->niovecs > 2) {
436 qlen += rxi_FreeDataBufsToQueue(p, 2, &cbs);
437 }
438 RX_FPQ_MARK_FREE(p);
439 num_pkts++;
440 }
441 if (!num_pkts)
442 return 0;
443 } else {
444 for (opr_queue_ScanSafe(q, cursor, store)) {
445 struct rx_packet *p
446 = opr_queue_Entry(cursor, struct rx_packet, entry);
447
448 if (p->niovecs > 2) {
449 qlen += rxi_FreeDataBufsToQueue(p, 2, &cbs);
450 }
451 RX_FPQ_MARK_FREE(p);
452 }
453 }
454
455 if (qlen) {
456 opr_queue_SpliceAppend(q, &cbs);
457 qlen += num_pkts;
458 } else
459 qlen = num_pkts;
460
461 NETPRI;
462 MUTEX_ENTER(&rx_freePktQ_lock);
463
464 opr_queue_SpliceAppend(&rx_freePacketQueue, q);
465 rx_nFreePackets += qlen;
466
467 /* Wakeup anyone waiting for packets */
468 rxi_PacketsUnWait();
469
470 MUTEX_EXIT(&rx_freePktQ_lock);
471 USERPRI;
472
473 return num_pkts;
474 }
475 #endif /* RX_ENABLE_TSFPQ */
476
477 /* this one is kind of awful.
478 * In rxkad, the packet has been all shortened, and everything, ready for
479 * sending. All of a sudden, we discover we need some of that space back.
480 * This isn't terribly general, because it knows that the packets are only
481 * rounded up to the EBS (userdata + security header).
482 */
483 int
484 rxi_RoundUpPacket(struct rx_packet *p, unsigned int nb)
485 {
486 int i;
487 i = p->niovecs - 1;
488 if (p->wirevec[i].iov_base == (caddr_t) p->localdata) {
489 if (p->wirevec[i].iov_len <= RX_FIRSTBUFFERSIZE - nb) {
490 p->wirevec[i].iov_len += nb;
491 return 0;
492 }
493 } else {
494 if (p->wirevec[i].iov_len <= RX_CBUFFERSIZE - nb) {
495 p->wirevec[i].iov_len += nb;
496 return 0;
497 }
498 }
499
500 return 0;
501 }
502
503 /* get sufficient space to store nb bytes of data (or more), and hook
504 * it into the supplied packet. Return nbytes<=0 if successful, otherwise
505 * returns the number of bytes >0 which it failed to come up with.
506 * Don't need to worry about locking on packet, since only
507 * one thread can manipulate one at a time. Locking on continution
508 * packets is handled by AllocPacketBufs */
509 /* MTUXXX don't need to go throught the for loop if we can trust niovecs */
510 int
511 rxi_AllocDataBuf(struct rx_packet *p, int nb, int class)
512 {
513 int i, nv;
514 struct opr_queue q, *cursor, *store;
515
516 /* compute the number of cbuf's we need */
517 nv = nb / RX_CBUFFERSIZE;
518 if ((nv * RX_CBUFFERSIZE) < nb)
519 nv++;
520 if ((nv + p->niovecs) > RX_MAXWVECS)
521 nv = RX_MAXWVECS - p->niovecs;
522 if (nv < 1)
523 return nb;
524
525 /* allocate buffers */
526 opr_queue_Init(&q);
527 nv = AllocPacketBufs(class, nv, &q);
528
529 /* setup packet iovs */
530 i = p ->niovecs;
531 for (opr_queue_ScanSafe(&q, cursor, store)) {
532 struct rx_packet *cb
533 = opr_queue_Entry(cursor, struct rx_packet, entry);
534
535 opr_queue_Remove(&cb->entry);
536 p->wirevec[i].iov_base = (caddr_t) cb->localdata;
537 p->wirevec[i].iov_len = RX_CBUFFERSIZE;
538 i++;
539 }
540
541 nb -= (nv * RX_CBUFFERSIZE);
542 p->length += (nv * RX_CBUFFERSIZE);
543 p->niovecs += nv;
544
545 return nb;
546 }
547
548 /**
549 * Register allocated packets.
550 *
551 * @param[in] addr array of packets
552 * @param[in] npkt number of packets
553 *
554 * @return none
555 */
556 static void
557 registerPackets(struct rx_packet *addr, afs_uint32 npkt)
558 {
559 struct rx_mallocedPacket *mp;
560
561 mp = osi_Alloc(sizeof(*mp));
562
563 osi_Assert(mp != NULL);
564 memset(mp, 0, sizeof(*mp));
565
566 mp->addr = addr;
567 mp->size = npkt * sizeof(struct rx_packet);
568 osi_Assert(npkt <= MAX_AFS_UINT32 / sizeof(struct rx_packet));
569
570 MUTEX_ENTER(&rx_mallocedPktQ_lock);
571 opr_queue_Append(&rx_mallocedPacketQueue, &mp->entry);
572 MUTEX_EXIT(&rx_mallocedPktQ_lock);
573 }
574
575 /* Add more packet buffers */
576 #ifdef RX_ENABLE_TSFPQ
577 void
578 rxi_MorePackets(int apackets)
579 {
580 struct rx_packet *p, *e;
581 struct rx_ts_info_t * rx_ts_info;
582 int getme;
583 SPLVAR;
584
585 getme = apackets * sizeof(struct rx_packet);
586 p = osi_Alloc(getme);
587 osi_Assert(p);
588 registerPackets(p, apackets);
589
590 PIN(p, getme); /* XXXXX */
591 memset(p, 0, getme);
592 RX_TS_INFO_GET(rx_ts_info);
593
594 RX_TS_FPQ_LOCAL_ALLOC(rx_ts_info,apackets);
595 /* TSFPQ patch also needs to keep track of total packets */
596
597 MUTEX_ENTER(&rx_packets_mutex);
598 rx_nPackets += apackets;
599 RX_TS_FPQ_COMPUTE_LIMITS;
600 MUTEX_EXIT(&rx_packets_mutex);
601
602 for (e = p + apackets; p < e; p++) {
603 RX_PACKET_IOV_INIT(p);
604 p->niovecs = 2;
605
606 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
607
608 NETPRI;
609 MUTEX_ENTER(&rx_freePktQ_lock);
610 #ifdef RXDEBUG_PACKET
611 p->packetId = rx_packet_id++;
612 p->allNextp = rx_mallocedP;
613 #endif /* RXDEBUG_PACKET */
614 rx_mallocedP = p;
615 MUTEX_EXIT(&rx_freePktQ_lock);
616 USERPRI;
617 }
618 rx_ts_info->_FPQ.delta += apackets;
619
620 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
621 NETPRI;
622 MUTEX_ENTER(&rx_freePktQ_lock);
623
624 RX_TS_FPQ_LTOG(rx_ts_info);
625 rxi_NeedMorePackets = FALSE;
626 rxi_PacketsUnWait();
627
628 MUTEX_EXIT(&rx_freePktQ_lock);
629 USERPRI;
630 }
631 }
632 #else /* RX_ENABLE_TSFPQ */
633 void
634 rxi_MorePackets(int apackets)
635 {
636 struct rx_packet *p, *e;
637 int getme;
638 SPLVAR;
639
640 getme = apackets * sizeof(struct rx_packet);
641 p = osi_Alloc(getme);
642 osi_Assert(p);
643 registerPackets(p, apackets);
644
645 PIN(p, getme); /* XXXXX */
646 memset(p, 0, getme);
647 NETPRI;
648 MUTEX_ENTER(&rx_freePktQ_lock);
649
650 for (e = p + apackets; p < e; p++) {
651 RX_PACKET_IOV_INIT(p);
652 #ifdef RX_TRACK_PACKETS
653 p->flags |= RX_PKTFLAG_FREE;
654 #endif
655 p->niovecs = 2;
656
657 opr_queue_Append(&rx_freePacketQueue, &p->entry);
658 #ifdef RXDEBUG_PACKET
659 p->packetId = rx_packet_id++;
660 p->allNextp = rx_mallocedP;
661 #endif /* RXDEBUG_PACKET */
662 rx_mallocedP = p;
663 }
664
665 rx_nPackets += apackets;
666 rx_nFreePackets += apackets;
667 rxi_NeedMorePackets = FALSE;
668 rxi_PacketsUnWait();
669
670 MUTEX_EXIT(&rx_freePktQ_lock);
671 USERPRI;
672 }
673 #endif /* RX_ENABLE_TSFPQ */
674
675 #ifdef RX_ENABLE_TSFPQ
676 void
677 rxi_MorePacketsTSFPQ(int apackets, int flush_global, int num_keep_local)
678 {
679 struct rx_packet *p, *e;
680 struct rx_ts_info_t * rx_ts_info;
681 int getme;
682 SPLVAR;
683
684 getme = apackets * sizeof(struct rx_packet);
685 p = osi_Alloc(getme);
686 registerPackets(p, apackets);
687
688 PIN(p, getme); /* XXXXX */
689 memset(p, 0, getme);
690 RX_TS_INFO_GET(rx_ts_info);
691
692 RX_TS_FPQ_LOCAL_ALLOC(rx_ts_info,apackets);
693 /* TSFPQ patch also needs to keep track of total packets */
694 MUTEX_ENTER(&rx_packets_mutex);
695 rx_nPackets += apackets;
696 RX_TS_FPQ_COMPUTE_LIMITS;
697 MUTEX_EXIT(&rx_packets_mutex);
698
699 for (e = p + apackets; p < e; p++) {
700 RX_PACKET_IOV_INIT(p);
701 p->niovecs = 2;
702 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
703
704 NETPRI;
705 MUTEX_ENTER(&rx_freePktQ_lock);
706 #ifdef RXDEBUG_PACKET
707 p->packetId = rx_packet_id++;
708 p->allNextp = rx_mallocedP;
709 #endif /* RXDEBUG_PACKET */
710 rx_mallocedP = p;
711 MUTEX_EXIT(&rx_freePktQ_lock);
712 USERPRI;
713 }
714 rx_ts_info->_FPQ.delta += apackets;
715
716 if (flush_global &&
717 (num_keep_local < apackets)) {
718 NETPRI;
719 MUTEX_ENTER(&rx_freePktQ_lock);
720
721 RX_TS_FPQ_LTOG2(rx_ts_info, (apackets - num_keep_local));
722 rxi_NeedMorePackets = FALSE;
723 rxi_PacketsUnWait();
724
725 MUTEX_EXIT(&rx_freePktQ_lock);
726 USERPRI;
727 }
728 }
729 #endif /* RX_ENABLE_TSFPQ */
730
731 #ifndef KERNEL
732 /* Add more packet buffers */
733 static void
734 rxi_MorePacketsNoLock(int apackets)
735 {
736 #ifdef RX_ENABLE_TSFPQ
737 struct rx_ts_info_t * rx_ts_info;
738 #endif /* RX_ENABLE_TSFPQ */
739 struct rx_packet *p, *e;
740 int getme;
741
742 /* allocate enough packets that 1/4 of the packets will be able
743 * to hold maximal amounts of data */
744 apackets += (apackets / 4)
745 * ((rx_maxJumboRecvSize - RX_FIRSTBUFFERSIZE) / RX_CBUFFERSIZE);
746 do {
747 getme = apackets * sizeof(struct rx_packet);
748 p = osi_Alloc(getme);
749 if (p == NULL) {
750 apackets -= apackets / 4;
751 osi_Assert(apackets > 0);
752 }
753 } while(p == NULL);
754 memset(p, 0, getme);
755 registerPackets(p, apackets);
756
757 #ifdef RX_ENABLE_TSFPQ
758 RX_TS_INFO_GET(rx_ts_info);
759 RX_TS_FPQ_GLOBAL_ALLOC(rx_ts_info,apackets);
760 #endif /* RX_ENABLE_TSFPQ */
761
762 for (e = p + apackets; p < e; p++) {
763 RX_PACKET_IOV_INIT(p);
764 #ifdef RX_TRACK_PACKETS
765 p->flags |= RX_PKTFLAG_FREE;
766 #endif
767 p->niovecs = 2;
768
769 opr_queue_Append(&rx_freePacketQueue, &p->entry);
770 #ifdef RXDEBUG_PACKET
771 p->packetId = rx_packet_id++;
772 p->allNextp = rx_mallocedP;
773 #endif /* RXDEBUG_PACKET */
774 rx_mallocedP = p;
775 }
776
777 rx_nFreePackets += apackets;
778 MUTEX_ENTER(&rx_packets_mutex);
779 rx_nPackets += apackets;
780 #ifdef RX_ENABLE_TSFPQ
781 RX_TS_FPQ_COMPUTE_LIMITS;
782 #endif /* RX_ENABLE_TSFPQ */
783 MUTEX_EXIT(&rx_packets_mutex);
784 rxi_NeedMorePackets = FALSE;
785 rxi_PacketsUnWait();
786 }
787 #endif /* !KERNEL */
788
789 void
790 rxi_FreeAllPackets(void)
791 {
792 struct rx_mallocedPacket *mp;
793
794 MUTEX_ENTER(&rx_mallocedPktQ_lock);
795
796 while (!opr_queue_IsEmpty(&rx_mallocedPacketQueue)) {
797 mp = opr_queue_First(&rx_mallocedPacketQueue,
798 struct rx_mallocedPacket, entry);
799 opr_queue_Remove(&mp->entry);
800 osi_Free(mp->addr, mp->size);
801 UNPIN(mp->addr, mp->size);
802 osi_Free(mp, sizeof(*mp));
803 }
804 MUTEX_EXIT(&rx_mallocedPktQ_lock);
805 }
806
807 #ifdef RX_ENABLE_TSFPQ
808 static void
809 rxi_AdjustLocalPacketsTSFPQ(int num_keep_local, int allow_overcommit)
810 {
811 struct rx_ts_info_t * rx_ts_info;
812 int xfer;
813 SPLVAR;
814
815 RX_TS_INFO_GET(rx_ts_info);
816
817 if (num_keep_local != rx_ts_info->_FPQ.len) {
818 NETPRI;
819 MUTEX_ENTER(&rx_freePktQ_lock);
820 if (num_keep_local < rx_ts_info->_FPQ.len) {
821 xfer = rx_ts_info->_FPQ.len - num_keep_local;
822 RX_TS_FPQ_LTOG2(rx_ts_info, xfer);
823 rxi_PacketsUnWait();
824 } else {
825 xfer = num_keep_local - rx_ts_info->_FPQ.len;
826 if ((num_keep_local > rx_TSFPQLocalMax) && !allow_overcommit)
827 xfer = rx_TSFPQLocalMax - rx_ts_info->_FPQ.len;
828 if (rx_nFreePackets < xfer) {
829 rxi_MorePacketsNoLock(MAX(xfer - rx_nFreePackets, 4 * rx_initSendWindow));
830 }
831 RX_TS_FPQ_GTOL2(rx_ts_info, xfer);
832 }
833 MUTEX_EXIT(&rx_freePktQ_lock);
834 USERPRI;
835 }
836 }
837
838 void
839 rxi_FlushLocalPacketsTSFPQ(void)
840 {
841 rxi_AdjustLocalPacketsTSFPQ(0, 0);
842 }
843 #endif /* RX_ENABLE_TSFPQ */
844
845 /* Allocate more packets iff we need more continuation buffers */
846 /* In kernel, can't page in memory with interrupts disabled, so we
847 * don't use the event mechanism. */
848 void
849 rx_CheckPackets(void)
850 {
851 if (rxi_NeedMorePackets) {
852 rxi_MorePackets(rx_maxSendWindow);
853 }
854 }
855
856 /* In the packet freeing routine below, the assumption is that
857 we want all of the packets to be used equally frequently, so that we
858 don't get packet buffers paging out. It would be just as valid to
859 assume that we DO want them to page out if not many are being used.
860 In any event, we assume the former, and append the packets to the end
861 of the free list. */
862 /* This explanation is bogus. The free list doesn't remain in any kind of
863 useful order for afs_int32: the packets in use get pretty much randomly scattered
864 across all the pages. In order to permit unused {packets,bufs} to page out, they
865 must be stored so that packets which are adjacent in memory are adjacent in the
866 free list. An array springs rapidly to mind.
867 */
868
869 /* Actually free the packet p. */
870 #ifndef RX_ENABLE_TSFPQ
871 static void
872 rxi_FreePacketNoLock(struct rx_packet *p)
873 {
874 dpf(("Free %"AFS_PTR_FMT"\n", p));
875
876 RX_FPQ_MARK_FREE(p);
877 rx_nFreePackets++;
878 opr_queue_Append(&rx_freePacketQueue, &p->entry);
879 }
880 #endif /* RX_ENABLE_TSFPQ */
881
882 #ifdef RX_ENABLE_TSFPQ
883 static void
884 rxi_FreePacketTSFPQ(struct rx_packet *p, int flush_global)
885 {
886 struct rx_ts_info_t * rx_ts_info;
887 dpf(("Free %"AFS_PTR_FMT"\n", p));
888
889 RX_TS_INFO_GET(rx_ts_info);
890 RX_TS_FPQ_CHECKIN(rx_ts_info,p);
891
892 if (flush_global && (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax)) {
893 NETPRI;
894 MUTEX_ENTER(&rx_freePktQ_lock);
895
896 RX_TS_FPQ_LTOG(rx_ts_info);
897
898 /* Wakeup anyone waiting for packets */
899 rxi_PacketsUnWait();
900
901 MUTEX_EXIT(&rx_freePktQ_lock);
902 USERPRI;
903 }
904 }
905 #endif /* RX_ENABLE_TSFPQ */
906
907 /*
908 * free continuation buffers off a packet into a queue
909 *
910 * [IN] p -- packet from which continuation buffers will be freed
911 * [IN] first -- iovec offset of first continuation buffer to free
912 * [IN] q -- queue into which continuation buffers will be chained
913 *
914 * returns:
915 * number of continuation buffers freed
916 */
917 #ifndef RX_ENABLE_TSFPQ
918 static int
919 rxi_FreeDataBufsToQueue(struct rx_packet *p, afs_uint32 first, struct opr_queue * q)
920 {
921 struct iovec *iov;
922 struct rx_packet * cb;
923 int count = 0;
924
925 for (first = MAX(2, first); first < p->niovecs; first++, count++) {
926 iov = &p->wirevec[first];
927 if (!iov->iov_base)
928 osi_Panic("rxi_FreeDataBufsToQueue: unexpected NULL iov");
929 cb = RX_CBUF_TO_PACKET(iov->iov_base, p);
930 RX_FPQ_MARK_FREE(cb);
931 opr_queue_Append(q, &cb->entry);
932 }
933 p->length = 0;
934 p->niovecs = 0;
935
936 return count;
937 }
938
939 /*
940 * free packet continuation buffers into the global free packet pool
941 *
942 * [IN] p -- packet from which to free continuation buffers
943 * [IN] first -- iovec offset of first continuation buffer to free
944 *
945 * returns:
946 * zero always
947 */
948 static int
949 rxi_FreeDataBufsNoLock(struct rx_packet *p, afs_uint32 first)
950 {
951 struct iovec *iov;
952
953 for (first = MAX(2, first); first < p->niovecs; first++) {
954 iov = &p->wirevec[first];
955 if (!iov->iov_base)
956 osi_Panic("rxi_FreeDataBufsNoLock: unexpected NULL iov");
957 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
958 }
959 p->length = 0;
960 p->niovecs = 0;
961
962 return 0;
963 }
964
965 #else
966
967 /*
968 * free packet continuation buffers into the thread-local free pool
969 *
970 * [IN] p -- packet from which continuation buffers will be freed
971 * [IN] first -- iovec offset of first continuation buffer to free
972 * any value less than 2, the min number of iovecs,
973 * is treated as if it is 2.
974 * [IN] flush_global -- if nonzero, we will flush overquota packets to the
975 * global free pool before returning
976 *
977 * returns:
978 * zero always
979 */
980 static int
981 rxi_FreeDataBufsTSFPQ(struct rx_packet *p, afs_uint32 first, int flush_global)
982 {
983 struct iovec *iov;
984 struct rx_ts_info_t * rx_ts_info;
985
986 RX_TS_INFO_GET(rx_ts_info);
987
988 for (first = MAX(2, first); first < p->niovecs; first++) {
989 iov = &p->wirevec[first];
990 if (!iov->iov_base)
991 osi_Panic("rxi_FreeDataBufsTSFPQ: unexpected NULL iov");
992 RX_TS_FPQ_CHECKIN(rx_ts_info,RX_CBUF_TO_PACKET(iov->iov_base, p));
993 }
994 p->length = 0;
995 p->niovecs = 0;
996
997 if (flush_global && (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax)) {
998 NETPRI;
999 MUTEX_ENTER(&rx_freePktQ_lock);
1000
1001 RX_TS_FPQ_LTOG(rx_ts_info);
1002
1003 /* Wakeup anyone waiting for packets */
1004 rxi_PacketsUnWait();
1005
1006 MUTEX_EXIT(&rx_freePktQ_lock);
1007 USERPRI;
1008 }
1009 return 0;
1010 }
1011 #endif /* RX_ENABLE_TSFPQ */
1012
1013 int rxi_nBadIovecs = 0;
1014
1015 /* rxi_RestoreDataBufs
1016 *
1017 * Restore the correct sizes to the iovecs. Called when reusing a packet
1018 * for reading off the wire.
1019 */
1020 void
1021 rxi_RestoreDataBufs(struct rx_packet *p)
1022 {
1023 unsigned int i;
1024 struct iovec *iov;
1025
1026 RX_PACKET_IOV_INIT(p);
1027
1028 for (i = 2, iov = &p->wirevec[2]; i < p->niovecs; i++, iov++) {
1029 if (!iov->iov_base) {
1030 rxi_nBadIovecs++;
1031 p->niovecs = i;
1032 break;
1033 }
1034 iov->iov_len = RX_CBUFFERSIZE;
1035 }
1036 }
1037
1038 #ifdef RX_ENABLE_TSFPQ
1039 int
1040 rxi_TrimDataBufs(struct rx_packet *p, int first)
1041 {
1042 int length;
1043 struct iovec *iov, *end;
1044 struct rx_ts_info_t * rx_ts_info;
1045 SPLVAR;
1046
1047 if (first != 1)
1048 osi_Panic("TrimDataBufs 1: first must be 1");
1049
1050 /* Skip over continuation buffers containing message data */
1051 iov = &p->wirevec[2];
1052 end = iov + (p->niovecs - 2);
1053 length = p->length - p->wirevec[1].iov_len;
1054 for (; iov < end && length > 0; iov++) {
1055 if (!iov->iov_base)
1056 osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
1057 length -= iov->iov_len;
1058 }
1059
1060 /* iov now points to the first empty data buffer. */
1061 if (iov >= end)
1062 return 0;
1063
1064 RX_TS_INFO_GET(rx_ts_info);
1065 for (; iov < end; iov++) {
1066 if (!iov->iov_base)
1067 osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
1068 RX_TS_FPQ_CHECKIN(rx_ts_info,RX_CBUF_TO_PACKET(iov->iov_base, p));
1069 p->niovecs--;
1070 }
1071 if (rx_ts_info->_FPQ.len > rx_TSFPQLocalMax) {
1072 NETPRI;
1073 MUTEX_ENTER(&rx_freePktQ_lock);
1074
1075 RX_TS_FPQ_LTOG(rx_ts_info);
1076 rxi_PacketsUnWait();
1077
1078 MUTEX_EXIT(&rx_freePktQ_lock);
1079 USERPRI;
1080 }
1081
1082 return 0;
1083 }
1084 #else /* RX_ENABLE_TSFPQ */
1085 int
1086 rxi_TrimDataBufs(struct rx_packet *p, int first)
1087 {
1088 int length;
1089 struct iovec *iov, *end;
1090 SPLVAR;
1091
1092 if (first != 1)
1093 osi_Panic("TrimDataBufs 1: first must be 1");
1094
1095 /* Skip over continuation buffers containing message data */
1096 iov = &p->wirevec[2];
1097 end = iov + (p->niovecs - 2);
1098 length = p->length - p->wirevec[1].iov_len;
1099 for (; iov < end && length > 0; iov++) {
1100 if (!iov->iov_base)
1101 osi_Panic("TrimDataBufs 3: vecs 1-niovecs must not be NULL");
1102 length -= iov->iov_len;
1103 }
1104
1105 /* iov now points to the first empty data buffer. */
1106 if (iov >= end)
1107 return 0;
1108
1109 NETPRI;
1110 MUTEX_ENTER(&rx_freePktQ_lock);
1111
1112 for (; iov < end; iov++) {
1113 if (!iov->iov_base)
1114 osi_Panic("TrimDataBufs 4: vecs 2-niovecs must not be NULL");
1115 rxi_FreePacketNoLock(RX_CBUF_TO_PACKET(iov->iov_base, p));
1116 p->niovecs--;
1117 }
1118 rxi_PacketsUnWait();
1119
1120 MUTEX_EXIT(&rx_freePktQ_lock);
1121 USERPRI;
1122
1123 return 0;
1124 }
1125 #endif /* RX_ENABLE_TSFPQ */
1126
1127 /* Free the packet p. P is assumed not to be on any queue, i.e.
1128 * remove it yourself first if you call this routine. */
1129 #ifdef RX_ENABLE_TSFPQ
1130 void
1131 rxi_FreePacket(struct rx_packet *p)
1132 {
1133 rxi_FreeDataBufsTSFPQ(p, 2, 0);
1134 rxi_FreePacketTSFPQ(p, RX_TS_FPQ_FLUSH_GLOBAL);
1135 }
1136 #else /* RX_ENABLE_TSFPQ */
1137 void
1138 rxi_FreePacket(struct rx_packet *p)
1139 {
1140 SPLVAR;
1141
1142 NETPRI;
1143 MUTEX_ENTER(&rx_freePktQ_lock);
1144
1145 rxi_FreeDataBufsNoLock(p, 2);
1146 rxi_FreePacketNoLock(p);
1147 /* Wakeup anyone waiting for packets */
1148 rxi_PacketsUnWait();
1149
1150 MUTEX_EXIT(&rx_freePktQ_lock);
1151 USERPRI;
1152 }
1153 #endif /* RX_ENABLE_TSFPQ */
1154
1155 /* rxi_AllocPacket sets up p->length so it reflects the number of
1156 * bytes in the packet at this point, **not including** the header.
1157 * The header is absolutely necessary, besides, this is the way the
1158 * length field is usually used */
1159 #ifdef RX_ENABLE_TSFPQ
1160 static struct rx_packet *
1161 rxi_AllocPacketNoLock(int class)
1162 {
1163 struct rx_packet *p;
1164 struct rx_ts_info_t * rx_ts_info;
1165
1166 RX_TS_INFO_GET(rx_ts_info);
1167
1168 if (rx_stats_active)
1169 rx_atomic_inc(&rx_stats.packetRequests);
1170 if (opr_queue_IsEmpty(&rx_ts_info->_FPQ.queue)) {
1171
1172 #ifdef KERNEL
1173 if (opr_queue_IsEmpty(&rx_freePacketQueue))
1174 osi_Panic("rxi_AllocPacket error");
1175 #else /* KERNEL */
1176 if (opr_queue_IsEmpty(&rx_freePacketQueue))
1177 rxi_MorePacketsNoLock(rx_maxSendWindow);
1178 #endif /* KERNEL */
1179
1180
1181 RX_TS_FPQ_GTOL(rx_ts_info);
1182 }
1183
1184 RX_TS_FPQ_CHECKOUT(rx_ts_info,p);
1185
1186 dpf(("Alloc %"AFS_PTR_FMT", class %d\n", p, class));
1187
1188
1189 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1190 * order to truncate outbound packets. In the near future, may need
1191 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1192 */
1193 RX_PACKET_IOV_FULLINIT(p);
1194 return p;
1195 }
1196 #else /* RX_ENABLE_TSFPQ */
1197 static struct rx_packet *
1198 rxi_AllocPacketNoLock(int class)
1199 {
1200 struct rx_packet *p;
1201
1202 #ifdef KERNEL
1203 if (rxi_OverQuota(class)) {
1204 rxi_NeedMorePackets = TRUE;
1205 if (rx_stats_active) {
1206 switch (class) {
1207 case RX_PACKET_CLASS_RECEIVE:
1208 rx_atomic_inc(&rx_stats.receivePktAllocFailures);
1209 break;
1210 case RX_PACKET_CLASS_SEND:
1211 rx_atomic_inc(&rx_stats.sendPktAllocFailures);
1212 break;
1213 case RX_PACKET_CLASS_SPECIAL:
1214 rx_atomic_inc(&rx_stats.specialPktAllocFailures);
1215 break;
1216 case RX_PACKET_CLASS_RECV_CBUF:
1217 rx_atomic_inc(&rx_stats.receiveCbufPktAllocFailures);
1218 break;
1219 case RX_PACKET_CLASS_SEND_CBUF:
1220 rx_atomic_inc(&rx_stats.sendCbufPktAllocFailures);
1221 break;
1222 }
1223 }
1224 return (struct rx_packet *)0;
1225 }
1226 #endif /* KERNEL */
1227
1228 if (rx_stats_active)
1229 rx_atomic_inc(&rx_stats.packetRequests);
1230
1231 #ifdef KERNEL
1232 if (opr_queue_IsEmpty(&rx_freePacketQueue))
1233 osi_Panic("rxi_AllocPacket error");
1234 #else /* KERNEL */
1235 if (opr_queue_IsEmpty(&rx_freePacketQueue))
1236 rxi_MorePacketsNoLock(rx_maxSendWindow);
1237 #endif /* KERNEL */
1238
1239 rx_nFreePackets--;
1240 p = opr_queue_First(&rx_freePacketQueue, struct rx_packet, entry);
1241 opr_queue_Remove(&p->entry);
1242 RX_FPQ_MARK_USED(p);
1243
1244 dpf(("Alloc %"AFS_PTR_FMT", class %d\n", p, class));
1245
1246
1247 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1248 * order to truncate outbound packets. In the near future, may need
1249 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1250 */
1251 RX_PACKET_IOV_FULLINIT(p);
1252 return p;
1253 }
1254 #endif /* RX_ENABLE_TSFPQ */
1255
1256 #ifdef RX_ENABLE_TSFPQ
1257 static struct rx_packet *
1258 rxi_AllocPacketTSFPQ(int class, int pull_global)
1259 {
1260 struct rx_packet *p;
1261 struct rx_ts_info_t * rx_ts_info;
1262
1263 RX_TS_INFO_GET(rx_ts_info);
1264
1265 if (rx_stats_active)
1266 rx_atomic_inc(&rx_stats.packetRequests);
1267 if (pull_global && opr_queue_IsEmpty(&rx_ts_info->_FPQ.queue)) {
1268 MUTEX_ENTER(&rx_freePktQ_lock);
1269
1270 if (opr_queue_IsEmpty(&rx_freePacketQueue))
1271 rxi_MorePacketsNoLock(rx_maxSendWindow);
1272
1273 RX_TS_FPQ_GTOL(rx_ts_info);
1274
1275 MUTEX_EXIT(&rx_freePktQ_lock);
1276 } else if (opr_queue_IsEmpty(&rx_ts_info->_FPQ.queue)) {
1277 return NULL;
1278 }
1279
1280 RX_TS_FPQ_CHECKOUT(rx_ts_info,p);
1281
1282 dpf(("Alloc %"AFS_PTR_FMT", class %d\n", p, class));
1283
1284 /* have to do this here because rx_FlushWrite fiddles with the iovs in
1285 * order to truncate outbound packets. In the near future, may need
1286 * to allocate bufs from a static pool here, and/or in AllocSendPacket
1287 */
1288 RX_PACKET_IOV_FULLINIT(p);
1289 return p;
1290 }
1291 #endif /* RX_ENABLE_TSFPQ */
1292
1293 #ifdef RX_ENABLE_TSFPQ
1294 struct rx_packet *
1295 rxi_AllocPacket(int class)
1296 {
1297 struct rx_packet *p;
1298
1299 p = rxi_AllocPacketTSFPQ(class, RX_TS_FPQ_PULL_GLOBAL);
1300 return p;
1301 }
1302 #else /* RX_ENABLE_TSFPQ */
1303 struct rx_packet *
1304 rxi_AllocPacket(int class)
1305 {
1306 struct rx_packet *p;
1307
1308 MUTEX_ENTER(&rx_freePktQ_lock);
1309 p = rxi_AllocPacketNoLock(class);
1310 MUTEX_EXIT(&rx_freePktQ_lock);
1311 return p;
1312 }
1313 #endif /* RX_ENABLE_TSFPQ */
1314
1315 /* This guy comes up with as many buffers as it {takes,can get} given
1316 * the MTU for this call. It also sets the packet length before
1317 * returning. caution: this is often called at NETPRI
1318 * Called with call locked.
1319 */
1320 struct rx_packet *
1321 rxi_AllocSendPacket(struct rx_call *call, int want)
1322 {
1323 struct rx_packet *p = (struct rx_packet *)0;
1324 int mud;
1325 unsigned delta;
1326
1327 SPLVAR;
1328 mud = call->MTU - RX_HEADER_SIZE;
1329 delta =
1330 rx_GetSecurityHeaderSize(rx_ConnectionOf(call)) +
1331 rx_GetSecurityMaxTrailerSize(rx_ConnectionOf(call));
1332
1333 #ifdef RX_ENABLE_TSFPQ
1334 if ((p = rxi_AllocPacketTSFPQ(RX_PACKET_CLASS_SEND, 0))) {
1335 want += delta;
1336 want = MIN(want, mud);
1337
1338 if ((unsigned)want > p->length)
1339 (void)rxi_AllocDataBuf(p, (want - p->length),
1340 RX_PACKET_CLASS_SEND_CBUF);
1341
1342 if (p->length > mud)
1343 p->length = mud;
1344
1345 if (delta >= p->length) {
1346 rxi_FreePacket(p);
1347 p = NULL;
1348 } else {
1349 p->length -= delta;
1350 }
1351 return p;
1352 }
1353 #endif /* RX_ENABLE_TSFPQ */
1354
1355 while (!(call->error)) {
1356 MUTEX_ENTER(&rx_freePktQ_lock);
1357 /* if an error occurred, or we get the packet we want, we're done */
1358 if ((p = rxi_AllocPacketNoLock(RX_PACKET_CLASS_SEND))) {
1359 MUTEX_EXIT(&rx_freePktQ_lock);
1360
1361 want += delta;
1362 want = MIN(want, mud);
1363
1364 if ((unsigned)want > p->length)
1365 (void)rxi_AllocDataBuf(p, (want - p->length),
1366 RX_PACKET_CLASS_SEND_CBUF);
1367
1368 if (p->length > mud)
1369 p->length = mud;
1370
1371 if (delta >= p->length) {
1372 rxi_FreePacket(p);
1373 p = NULL;
1374 } else {
1375 p->length -= delta;
1376 }
1377 break;
1378 }
1379
1380 /* no error occurred, and we didn't get a packet, so we sleep.
1381 * At this point, we assume that packets will be returned
1382 * sooner or later, as packets are acknowledged, and so we
1383 * just wait. */
1384 NETPRI;
1385 call->flags |= RX_CALL_WAIT_PACKETS;
1386 CALL_HOLD(call, RX_CALL_REFCOUNT_PACKET);
1387 MUTEX_EXIT(&call->lock);
1388 rx_waitingForPackets = 1;
1389
1390 #ifdef RX_ENABLE_LOCKS
1391 CV_WAIT(&rx_waitingForPackets_cv, &rx_freePktQ_lock);
1392 #else
1393 osi_rxSleep(&rx_waitingForPackets);
1394 #endif
1395 MUTEX_EXIT(&rx_freePktQ_lock);
1396 MUTEX_ENTER(&call->lock);
1397 CALL_RELE(call, RX_CALL_REFCOUNT_PACKET);
1398 call->flags &= ~RX_CALL_WAIT_PACKETS;
1399 USERPRI;
1400 }
1401
1402 return p;
1403 }
1404
1405 #ifndef KERNEL
1406 #ifdef AFS_NT40_ENV
1407 /* Windows does not use file descriptors. */
1408 #define CountFDs(amax) 0
1409 #else
1410 /* count the number of used FDs */
1411 static int
1412 CountFDs(int amax)
1413 {
1414 struct stat tstat;
1415 int i, code;
1416 int count;
1417
1418 count = 0;
1419 for (i = 0; i < amax; i++) {
1420 code = fstat(i, &tstat);
1421 if (code == 0)
1422 count++;
1423 }
1424 return count;
1425 }
1426 #endif /* AFS_NT40_ENV */
1427 #else /* KERNEL */
1428
1429 #define CountFDs(amax) amax
1430
1431 #endif /* KERNEL */
1432
1433 #if !defined(KERNEL) || defined(UKERNEL)
1434
1435 /* This function reads a single packet from the interface into the
1436 * supplied packet buffer (*p). Return 0 if the packet is bogus. The
1437 * (host,port) of the sender are stored in the supplied variables, and
1438 * the data length of the packet is stored in the packet structure.
1439 * The header is decoded. */
1440 int
1441 rxi_ReadPacket(osi_socket socket, struct rx_packet *p, afs_uint32 * host,
1442 u_short * port)
1443 {
1444 struct sockaddr_in from;
1445 int nbytes;
1446 afs_int32 rlen;
1447 afs_uint32 tlen, savelen;
1448 struct msghdr msg;
1449 rx_computelen(p, tlen);
1450 rx_SetDataSize(p, tlen); /* this is the size of the user data area */
1451
1452 tlen += RX_HEADER_SIZE; /* now this is the size of the entire packet */
1453 rlen = rx_maxJumboRecvSize; /* this is what I am advertising. Only check
1454 * it once in order to avoid races. */
1455 tlen = rlen - tlen;
1456 if (tlen > 0) {
1457 tlen = rxi_AllocDataBuf(p, tlen, RX_PACKET_CLASS_SEND_CBUF);
1458 if (tlen > 0) {
1459 tlen = rlen - tlen;
1460 } else
1461 tlen = rlen;
1462 } else
1463 tlen = rlen;
1464
1465 /* Extend the last iovec for padding, it's just to make sure that the
1466 * read doesn't return more data than we expect, and is done to get around
1467 * our problems caused by the lack of a length field in the rx header.
1468 * Use the extra buffer that follows the localdata in each packet
1469 * structure. */
1470 savelen = p->wirevec[p->niovecs - 1].iov_len;
1471 p->wirevec[p->niovecs - 1].iov_len += RX_EXTRABUFFERSIZE;
1472
1473 memset(&msg, 0, sizeof(msg));
1474 msg.msg_name = (char *)&from;
1475 msg.msg_namelen = sizeof(struct sockaddr_in);
1476 msg.msg_iov = p->wirevec;
1477 msg.msg_iovlen = p->niovecs;
1478 nbytes = rxi_Recvmsg(socket, &msg, 0);
1479
1480 /* restore the vec to its correct state */
1481 p->wirevec[p->niovecs - 1].iov_len = savelen;
1482
1483 p->length = (u_short)(nbytes - RX_HEADER_SIZE);
1484 if (nbytes < 0 || (nbytes > tlen) || (p->length & 0x8000)) { /* Bogus packet */
1485 if (nbytes < 0 && errno == EWOULDBLOCK) {
1486 if (rx_stats_active)
1487 rx_atomic_inc(&rx_stats.noPacketOnRead);
1488 } else if (nbytes <= 0) {
1489 if (rx_stats_active) {
1490 rx_atomic_inc(&rx_stats.bogusPacketOnRead);
1491 rx_stats.bogusHost = from.sin_addr.s_addr;
1492 }
1493 dpf(("B: bogus packet from [%x,%d] nb=%d\n", ntohl(from.sin_addr.s_addr),
1494 ntohs(from.sin_port), nbytes));
1495 }
1496 return 0;
1497 }
1498 #ifdef RXDEBUG
1499 else if ((rx_intentionallyDroppedOnReadPer100 > 0)
1500 && (random() % 100 < rx_intentionallyDroppedOnReadPer100)) {
1501 rxi_DecodePacketHeader(p);
1502
1503 *host = from.sin_addr.s_addr;
1504 *port = from.sin_port;
1505
1506 dpf(("Dropped %d %s: %x.%u.%u.%u.%u.%u.%u flags %d len %d\n",
1507 p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(*host), ntohs(*port), p->header.serial,
1508 p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags,
1509 p->length));
1510 #ifdef RX_TRIMDATABUFS
1511 rxi_TrimDataBufs(p, 1);
1512 #endif
1513 return 0;
1514 }
1515 #endif
1516 else {
1517 /* Extract packet header. */
1518 rxi_DecodePacketHeader(p);
1519
1520 *host = from.sin_addr.s_addr;
1521 *port = from.sin_port;
1522 if (rx_stats_active
1523 && p->header.type > 0 && p->header.type < RX_N_PACKET_TYPES) {
1524
1525 rx_atomic_inc(&rx_stats.packetsRead[p->header.type - 1]);
1526 }
1527
1528 #ifdef RX_TRIMDATABUFS
1529 /* Free any empty packet buffers at the end of this packet */
1530 rxi_TrimDataBufs(p, 1);
1531 #endif
1532 return 1;
1533 }
1534 }
1535
1536 #endif /* !KERNEL || UKERNEL */
1537
1538 /* This function splits off the first packet in a jumbo packet.
1539 * As of AFS 3.5, jumbograms contain more than one fixed size
1540 * packet, and the RX_JUMBO_PACKET flag is set in all but the
1541 * last packet header. All packets (except the last) are padded to
1542 * fall on RX_CBUFFERSIZE boundaries.
1543 * HACK: We store the length of the first n-1 packets in the
1544 * last two pad bytes. */
1545
1546 struct rx_packet *
1547 rxi_SplitJumboPacket(struct rx_packet *p, afs_uint32 host, short port,
1548 int first)
1549 {
1550 struct rx_packet *np;
1551 struct rx_jumboHeader *jp;
1552 int niov, i;
1553 struct iovec *iov;
1554 int length;
1555 afs_uint32 temp;
1556
1557 /* All but the last packet in each jumbogram are RX_JUMBOBUFFERSIZE
1558 * bytes in length. All but the first packet are preceded by
1559 * an abbreviated four byte header. The length of the last packet
1560 * is calculated from the size of the jumbogram. */
1561 length = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
1562
1563 if ((int)p->length < length) {
1564 dpf(("rxi_SplitJumboPacket: bogus length %d\n", p->length));
1565 return NULL;
1566 }
1567 niov = p->niovecs - 2;
1568 if (niov < 1) {
1569 dpf(("rxi_SplitJumboPacket: bogus niovecs %d\n", p->niovecs));
1570 return NULL;
1571 }
1572 iov = &p->wirevec[2];
1573 np = RX_CBUF_TO_PACKET(iov->iov_base, p);
1574
1575 /* Get a pointer to the abbreviated packet header */
1576 jp = (struct rx_jumboHeader *)
1577 ((char *)(p->wirevec[1].iov_base) + RX_JUMBOBUFFERSIZE);
1578
1579 /* Set up the iovecs for the next packet */
1580 np->wirevec[0].iov_base = (char *)(&np->wirehead[0]);
1581 np->wirevec[0].iov_len = sizeof(struct rx_header);
1582 np->wirevec[1].iov_base = (char *)(&np->localdata[0]);
1583 np->wirevec[1].iov_len = length - RX_JUMBOHEADERSIZE;
1584 np->niovecs = niov + 1;
1585 for (i = 2, iov++; i <= niov; i++, iov++) {
1586 np->wirevec[i] = *iov;
1587 }
1588 np->length = p->length - length;
1589 p->length = RX_JUMBOBUFFERSIZE;
1590 p->niovecs = 2;
1591
1592 /* Convert the jumbo packet header to host byte order */
1593 temp = ntohl(*(afs_uint32 *) jp);
1594 jp->flags = (u_char) (temp >> 24);
1595 jp->cksum = (u_short) (temp);
1596
1597 /* Fill in the packet header */
1598 np->header = p->header;
1599 np->header.serial = p->header.serial + 1;
1600 np->header.seq = p->header.seq + 1;
1601 np->header.userStatus = 0;
1602 np->header.flags = jp->flags;
1603 np->header.spare = jp->cksum;
1604
1605 return np;
1606 }
1607
1608 #ifndef KERNEL
1609 /* Send a udp datagram */
1610 int
1611 osi_NetSend(osi_socket socket, void *addr, struct iovec *dvec, int nvecs,
1612 int length, int istack)
1613 {
1614 struct msghdr msg;
1615 int ret;
1616
1617 memset(&msg, 0, sizeof(msg));
1618 msg.msg_iov = dvec;
1619 msg.msg_iovlen = nvecs;
1620 msg.msg_name = addr;
1621 msg.msg_namelen = sizeof(struct sockaddr_in);
1622
1623 ret = rxi_Sendmsg(socket, &msg, 0);
1624
1625 return ret;
1626 }
1627 #elif !defined(UKERNEL)
1628 /*
1629 * message receipt is done in rxk_input or rx_put.
1630 */
1631
1632 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
1633 /*
1634 * Copy an mblock to the contiguous area pointed to by cp.
1635 * MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
1636 * but it doesn't really.
1637 * Returns the number of bytes not transferred.
1638 * The message is NOT changed.
1639 */
1640 static int
1641 cpytoc(mblk_t * mp, int off, int len, char *cp)
1642 {
1643 int n;
1644
1645 for (; mp && len > 0; mp = mp->b_cont) {
1646 if (mp->b_datap->db_type != M_DATA) {
1647 return -1;
1648 }
1649 n = MIN(len, (mp->b_wptr - mp->b_rptr));
1650 memcpy(cp, (char *)mp->b_rptr, n);
1651 cp += n;
1652 len -= n;
1653 mp->b_rptr += n;
1654 }
1655 return (len);
1656 }
1657
1658 /* MTUXXX Supposed to skip <off> bytes and copy <len> bytes,
1659 * but it doesn't really.
1660 * This sucks, anyway, do it like m_cpy.... below
1661 */
1662 static int
1663 cpytoiovec(mblk_t * mp, int off, int len, struct iovec *iovs,
1664 int niovs)
1665 {
1666 int m, n, o, t, i;
1667
1668 for (i = -1, t = 0; i < niovs && mp && len > 0; mp = mp->b_cont) {
1669 if (mp->b_datap->db_type != M_DATA) {
1670 return -1;
1671 }
1672 n = MIN(len, (mp->b_wptr - mp->b_rptr));
1673 len -= n;
1674 while (n) {
1675 if (!t) {
1676 o = 0;
1677 i++;
1678 t = iovs[i].iov_len;
1679 }
1680 m = MIN(n, t);
1681 memcpy(iovs[i].iov_base + o, (char *)mp->b_rptr, m);
1682 mp->b_rptr += m;
1683 o += m;
1684 t -= m;
1685 n -= m;
1686 }
1687 }
1688 return (len);
1689 }
1690
1691 #define m_cpytoc(a, b, c, d) cpytoc(a, b, c, d)
1692 #define m_cpytoiovec(a, b, c, d, e) cpytoiovec(a, b, c, d, e)
1693 #else
1694 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_DARWIN80_ENV)
1695 static int
1696 m_cpytoiovec(struct mbuf *m, int off, int len, struct iovec iovs[], int niovs)
1697 {
1698 caddr_t p1, p2;
1699 unsigned int l1, l2, i, t;
1700
1701 if (m == NULL || off < 0 || len < 0 || iovs == NULL)
1702 osi_Panic("m_cpytoiovec"); /* MTUXXX probably don't need this check */
1703
1704 while (off && m)
1705 if (m->m_len <= off) {
1706 off -= m->m_len;
1707 m = m->m_next;
1708 continue;
1709 } else
1710 break;
1711
1712 if (m == NULL)
1713 return len;
1714
1715 p1 = mtod(m, caddr_t) + off;
1716 l1 = m->m_len - off;
1717 i = 0;
1718 p2 = iovs[0].iov_base;
1719 l2 = iovs[0].iov_len;
1720
1721 while (len) {
1722 t = MIN(l1, MIN(l2, (unsigned int)len));
1723 memcpy(p2, p1, t);
1724 p1 += t;
1725 p2 += t;
1726 l1 -= t;
1727 l2 -= t;
1728 len -= t;
1729 if (!l1) {
1730 m = m->m_next;
1731 if (!m)
1732 break;
1733 p1 = mtod(m, caddr_t);
1734 l1 = m->m_len;
1735 }
1736 if (!l2) {
1737 if (++i >= niovs)
1738 break;
1739 p2 = iovs[i].iov_base;
1740 l2 = iovs[i].iov_len;
1741 }
1742
1743 }
1744
1745 return len;
1746 }
1747 #endif /* LINUX */
1748 #endif /* AFS_SUN5_ENV */
1749
1750 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_DARWIN80_ENV)
1751 #if defined(AFS_NBSD_ENV)
1752 int
1753 rx_mb_to_packet(struct mbuf *amb, void (*free) (struct mbuf *), int hdr_len, int data_len, struct rx_packet *phandle)
1754 #else
1755 int
1756 rx_mb_to_packet(amb, free, hdr_len, data_len, phandle)
1757 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX110_ENV)
1758 mblk_t *amb;
1759 #else
1760 struct mbuf *amb;
1761 #endif
1762 void (*free) ();
1763 struct rx_packet *phandle;
1764 int hdr_len, data_len;
1765 #endif /* AFS_NBSD_ENV */
1766 {
1767 int code;
1768
1769 code =
1770 m_cpytoiovec(amb, hdr_len, data_len, phandle->wirevec,
1771 phandle->niovecs);
1772 (*free) (amb);
1773
1774 return code;
1775 }
1776 #endif /* LINUX */
1777 #endif /*KERNEL && !UKERNEL */
1778
1779
1780 /* send a response to a debug packet */
1781
1782 struct rx_packet *
1783 rxi_ReceiveDebugPacket(struct rx_packet *ap, osi_socket asocket,
1784 afs_uint32 ahost, short aport, int istack)
1785 {
1786 struct rx_debugIn tin;
1787 afs_int32 tl;
1788
1789 /*
1790 * Only respond to client-initiated Rx debug packets,
1791 * and clear the client flag in the response.
1792 */
1793 if (ap->header.flags & RX_CLIENT_INITIATED) {
1794 ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
1795 rxi_EncodePacketHeader(ap);
1796 } else {
1797 return ap;
1798 }
1799
1800 rx_packetread(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
1801 /* all done with packet, now set length to the truth, so we can
1802 * reuse this packet */
1803 rx_computelen(ap, ap->length);
1804
1805 tin.type = ntohl(tin.type);
1806 tin.index = ntohl(tin.index);
1807 switch (tin.type) {
1808 case RX_DEBUGI_GETSTATS:{
1809 struct rx_debugStats tstat;
1810
1811 /* get basic stats */
1812 memset(&tstat, 0, sizeof(tstat)); /* make sure spares are zero */
1813 tstat.version = RX_DEBUGI_VERSION;
1814 #ifndef RX_ENABLE_LOCKS
1815 tstat.waitingForPackets = rx_waitingForPackets;
1816 #endif
1817 MUTEX_ENTER(&rx_serverPool_lock);
1818 tstat.nFreePackets = htonl(rx_nFreePackets);
1819 tstat.nPackets = htonl(rx_nPackets);
1820 tstat.callsExecuted = htonl(rxi_nCalls);
1821 tstat.packetReclaims = htonl(rx_packetReclaims);
1822 tstat.usedFDs = CountFDs(64);
1823 tstat.nWaiting = htonl(rx_atomic_read(&rx_nWaiting));
1824 tstat.nWaited = htonl(rx_atomic_read(&rx_nWaited));
1825 tstat.idleThreads = opr_queue_Count(&rx_idleServerQueue);
1826 MUTEX_EXIT(&rx_serverPool_lock);
1827 tstat.idleThreads = htonl(tstat.idleThreads);
1828 tl = sizeof(struct rx_debugStats) - ap->length;
1829 if (tl > 0)
1830 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1831
1832 if (tl <= 0) {
1833 rx_packetwrite(ap, 0, sizeof(struct rx_debugStats),
1834 (char *)&tstat);
1835 ap->length = sizeof(struct rx_debugStats);
1836 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1837 rx_computelen(ap, ap->length);
1838 }
1839 break;
1840 }
1841
1842 case RX_DEBUGI_GETALLCONN:
1843 case RX_DEBUGI_GETCONN:{
1844 unsigned int i, j;
1845 struct rx_connection *tc;
1846 struct rx_call *tcall;
1847 struct rx_debugConn tconn;
1848 int all = (tin.type == RX_DEBUGI_GETALLCONN);
1849
1850
1851 tl = sizeof(struct rx_debugConn) - ap->length;
1852 if (tl > 0)
1853 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1854 if (tl > 0)
1855 return ap;
1856
1857 memset(&tconn, 0, sizeof(tconn)); /* make sure spares are zero */
1858 /* get N'th (maybe) "interesting" connection info */
1859 for (i = 0; i < rx_hashTableSize; i++) {
1860 #if !defined(KERNEL)
1861 /* the time complexity of the algorithm used here
1862 * exponentially increses with the number of connections.
1863 */
1864 #ifdef AFS_PTHREAD_ENV
1865 pthread_yield();
1866 #else
1867 (void)IOMGR_Poll();
1868 #endif
1869 #endif
1870 MUTEX_ENTER(&rx_connHashTable_lock);
1871 /* We might be slightly out of step since we are not
1872 * locking each call, but this is only debugging output.
1873 */
1874 for (tc = rx_connHashTable[i]; tc; tc = tc->next) {
1875 if ((all || rxi_IsConnInteresting(tc))
1876 && tin.index-- <= 0) {
1877 tconn.host = tc->peer->host;
1878 tconn.port = tc->peer->port;
1879 tconn.cid = htonl(tc->cid);
1880 tconn.epoch = htonl(tc->epoch);
1881 tconn.serial = htonl(tc->serial);
1882 for (j = 0; j < RX_MAXCALLS; j++) {
1883 tconn.callNumber[j] = htonl(tc->callNumber[j]);
1884 if ((tcall = tc->call[j])) {
1885 tconn.callState[j] = tcall->state;
1886 tconn.callMode[j] = tcall->app.mode;
1887 tconn.callFlags[j] = tcall->flags;
1888 if (!opr_queue_IsEmpty(&tcall->rq))
1889 tconn.callOther[j] |= RX_OTHER_IN;
1890 if (!opr_queue_IsEmpty(&tcall->tq))
1891 tconn.callOther[j] |= RX_OTHER_OUT;
1892 } else
1893 tconn.callState[j] = RX_STATE_NOTINIT;
1894 }
1895
1896 tconn.natMTU = htonl(tc->peer->natMTU);
1897 tconn.error = htonl(tc->error);
1898 tconn.flags = tc->flags;
1899 tconn.type = tc->type;
1900 tconn.securityIndex = tc->securityIndex;
1901 if (tc->securityObject) {
1902 RXS_GetStats(tc->securityObject, tc,
1903 &tconn.secStats);
1904 #define DOHTONL(a) (tconn.secStats.a = htonl(tconn.secStats.a))
1905 #define DOHTONS(a) (tconn.secStats.a = htons(tconn.secStats.a))
1906 DOHTONL(flags);
1907 DOHTONL(expires);
1908 DOHTONL(packetsReceived);
1909 DOHTONL(packetsSent);
1910 DOHTONL(bytesReceived);
1911 DOHTONL(bytesSent);
1912 for (i = 0;
1913 i <
1914 sizeof(tconn.secStats.spares) /
1915 sizeof(short); i++)
1916 DOHTONS(spares[i]);
1917 for (i = 0;
1918 i <
1919 sizeof(tconn.secStats.sparel) /
1920 sizeof(afs_int32); i++)
1921 DOHTONL(sparel[i]);
1922 }
1923
1924 MUTEX_EXIT(&rx_connHashTable_lock);
1925 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1926 (char *)&tconn);
1927 tl = ap->length;
1928 ap->length = sizeof(struct rx_debugConn);
1929 rxi_SendDebugPacket(ap, asocket, ahost, aport,
1930 istack);
1931 ap->length = tl;
1932 return ap;
1933 }
1934 }
1935 MUTEX_EXIT(&rx_connHashTable_lock);
1936 }
1937 /* if we make it here, there are no interesting packets */
1938 tconn.cid = htonl(0xffffffff); /* means end */
1939 rx_packetwrite(ap, 0, sizeof(struct rx_debugConn),
1940 (char *)&tconn);
1941 tl = ap->length;
1942 ap->length = sizeof(struct rx_debugConn);
1943 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
1944 ap->length = tl;
1945 break;
1946 }
1947
1948 /*
1949 * Pass back all the peer structures we have available
1950 */
1951
1952 case RX_DEBUGI_GETPEER:{
1953 unsigned int i;
1954 struct rx_peer *tp;
1955 struct rx_debugPeer tpeer;
1956
1957
1958 tl = sizeof(struct rx_debugPeer) - ap->length;
1959 if (tl > 0)
1960 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
1961 if (tl > 0)
1962 return ap;
1963
1964 memset(&tpeer, 0, sizeof(tpeer));
1965 for (i = 0; i < rx_hashTableSize; i++) {
1966 #if !defined(KERNEL)
1967 /* the time complexity of the algorithm used here
1968 * exponentially increses with the number of peers.
1969 *
1970 * Yielding after processing each hash table entry
1971 * and dropping rx_peerHashTable_lock.
1972 * also increases the risk that we will miss a new
1973 * entry - but we are willing to live with this
1974 * limitation since this is meant for debugging only
1975 */
1976 #ifdef AFS_PTHREAD_ENV
1977 pthread_yield();
1978 #else
1979 (void)IOMGR_Poll();
1980 #endif
1981 #endif
1982 MUTEX_ENTER(&rx_peerHashTable_lock);
1983 for (tp = rx_peerHashTable[i]; tp; tp = tp->next) {
1984 if (tin.index-- <= 0) {
1985 tp->refCount++;
1986 MUTEX_EXIT(&rx_peerHashTable_lock);
1987
1988 MUTEX_ENTER(&tp->peer_lock);
1989 tpeer.host = tp->host;
1990 tpeer.port = tp->port;
1991 tpeer.ifMTU = htons(tp->ifMTU);
1992 tpeer.idleWhen = htonl(tp->idleWhen);
1993 tpeer.refCount = htons(tp->refCount);
1994 tpeer.burstSize = 0;
1995 tpeer.burst = 0;
1996 tpeer.burstWait.sec = 0;
1997 tpeer.burstWait.usec = 0;
1998 tpeer.rtt = htonl(tp->rtt);
1999 tpeer.rtt_dev = htonl(tp->rtt_dev);
2000 tpeer.nSent = htonl(tp->nSent);
2001 tpeer.reSends = htonl(tp->reSends);
2002 tpeer.natMTU = htons(tp->natMTU);
2003 tpeer.maxMTU = htons(tp->maxMTU);
2004 tpeer.maxDgramPackets = htons(tp->maxDgramPackets);
2005 tpeer.ifDgramPackets = htons(tp->ifDgramPackets);
2006 tpeer.MTU = htons(tp->MTU);
2007 tpeer.cwind = htons(tp->cwind);
2008 tpeer.nDgramPackets = htons(tp->nDgramPackets);
2009 tpeer.congestSeq = htons(tp->congestSeq);
2010 tpeer.bytesSent.high =
2011 htonl(tp->bytesSent >> 32);
2012 tpeer.bytesSent.low =
2013 htonl(tp->bytesSent & MAX_AFS_UINT32);
2014 tpeer.bytesReceived.high =
2015 htonl(tp->bytesReceived >> 32);
2016 tpeer.bytesReceived.low =
2017 htonl(tp->bytesReceived & MAX_AFS_UINT32);
2018 MUTEX_EXIT(&tp->peer_lock);
2019
2020 MUTEX_ENTER(&rx_peerHashTable_lock);
2021 tp->refCount--;
2022 MUTEX_EXIT(&rx_peerHashTable_lock);
2023
2024 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
2025 (char *)&tpeer);
2026 tl = ap->length;
2027 ap->length = sizeof(struct rx_debugPeer);
2028 rxi_SendDebugPacket(ap, asocket, ahost, aport,
2029 istack);
2030 ap->length = tl;
2031 return ap;
2032 }
2033 }
2034 MUTEX_EXIT(&rx_peerHashTable_lock);
2035 }
2036 /* if we make it here, there are no interesting packets */
2037 tpeer.host = htonl(0xffffffff); /* means end */
2038 rx_packetwrite(ap, 0, sizeof(struct rx_debugPeer),
2039 (char *)&tpeer);
2040 tl = ap->length;
2041 ap->length = sizeof(struct rx_debugPeer);
2042 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2043 ap->length = tl;
2044 break;
2045 }
2046
2047 case RX_DEBUGI_RXSTATS:{
2048 int i;
2049 afs_int32 *s;
2050
2051 tl = sizeof(rx_stats) - ap->length;
2052 if (tl > 0)
2053 tl = rxi_AllocDataBuf(ap, tl, RX_PACKET_CLASS_SEND_CBUF);
2054 if (tl > 0)
2055 return ap;
2056
2057 /* Since its all int32s convert to network order with a loop. */
2058 if (rx_stats_active)
2059 MUTEX_ENTER(&rx_stats_mutex);
2060 s = (afs_int32 *) & rx_stats;
2061 for (i = 0; i < sizeof(rx_stats) / sizeof(afs_int32); i++, s++)
2062 rx_PutInt32(ap, i * sizeof(afs_int32), htonl(*s));
2063
2064 tl = ap->length;
2065 ap->length = sizeof(rx_stats);
2066 if (rx_stats_active)
2067 MUTEX_EXIT(&rx_stats_mutex);
2068 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2069 ap->length = tl;
2070 break;
2071 }
2072
2073 default:
2074 /* error response packet */
2075 tin.type = htonl(RX_DEBUGI_BADTYPE);
2076 tin.index = tin.type;
2077 rx_packetwrite(ap, 0, sizeof(struct rx_debugIn), (char *)&tin);
2078 tl = ap->length;
2079 ap->length = sizeof(struct rx_debugIn);
2080 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2081 ap->length = tl;
2082 break;
2083 }
2084 return ap;
2085 }
2086
2087 struct rx_packet *
2088 rxi_ReceiveVersionPacket(struct rx_packet *ap, osi_socket asocket,
2089 afs_uint32 ahost, short aport, int istack)
2090 {
2091 afs_int32 tl;
2092
2093 /*
2094 * Only respond to client-initiated version requests, and
2095 * clear that flag in the response.
2096 */
2097 if (ap->header.flags & RX_CLIENT_INITIATED) {
2098 char buf[66];
2099
2100 ap->header.flags = ap->header.flags & ~RX_CLIENT_INITIATED;
2101 rxi_EncodePacketHeader(ap);
2102 memset(buf, 0, sizeof(buf));
2103 strncpy(buf, cml_version_number + 4, sizeof(buf) - 1);
2104 rx_packetwrite(ap, 0, 65, buf);
2105 tl = ap->length;
2106 ap->length = 65;
2107 rxi_SendDebugPacket(ap, asocket, ahost, aport, istack);
2108 ap->length = tl;
2109 }
2110
2111 return ap;
2112 }
2113
2114
2115 /* send a debug packet back to the sender */
2116 static void
2117 rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket,
2118 afs_uint32 ahost, short aport, afs_int32 istack)
2119 {
2120 struct sockaddr_in taddr;
2121 unsigned int i, nbytes, savelen = 0;
2122 int saven = 0;
2123 #ifdef KERNEL
2124 int waslocked = ISAFS_GLOCK();
2125 #endif
2126
2127 taddr.sin_family = AF_INET;
2128 taddr.sin_port = aport;
2129 taddr.sin_addr.s_addr = ahost;
2130 memset(&taddr.sin_zero, 0, sizeof(taddr.sin_zero));
2131 #ifdef STRUCT_SOCKADDR_HAS_SA_LEN
2132 taddr.sin_len = sizeof(struct sockaddr_in);
2133 #endif
2134
2135 /* We need to trim the niovecs. */
2136 nbytes = apacket->length;
2137 for (i = 1; i < apacket->niovecs; i++) {
2138 if (nbytes <= apacket->wirevec[i].iov_len) {
2139 savelen = apacket->wirevec[i].iov_len;
2140 saven = apacket->niovecs;
2141 apacket->wirevec[i].iov_len = nbytes;
2142 apacket->niovecs = i + 1; /* so condition fails because i == niovecs */
2143 } else
2144 nbytes -= apacket->wirevec[i].iov_len;
2145 }
2146 #ifdef KERNEL
2147 #ifdef RX_KERNEL_TRACE
2148 if (ICL_SETACTIVE(afs_iclSetp)) {
2149 if (!waslocked)
2150 AFS_GLOCK();
2151 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2152 "before osi_NetSend()");
2153 AFS_GUNLOCK();
2154 }
2155 #else
2156 if (waslocked)
2157 AFS_GUNLOCK();
2158 #endif
2159 #endif
2160 /* debug packets are not reliably delivered, hence the cast below. */
2161 (void)osi_NetSend(asocket, &taddr, apacket->wirevec, apacket->niovecs,
2162 apacket->length + RX_HEADER_SIZE, istack);
2163 #ifdef KERNEL
2164 #ifdef RX_KERNEL_TRACE
2165 if (ICL_SETACTIVE(afs_iclSetp)) {
2166 AFS_GLOCK();
2167 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2168 "after osi_NetSend()");
2169 if (!waslocked)
2170 AFS_GUNLOCK();
2171 }
2172 #else
2173 if (waslocked)
2174 AFS_GLOCK();
2175 #endif
2176 #endif
2177 if (saven) { /* means we truncated the packet above. */
2178 apacket->wirevec[i - 1].iov_len = savelen;
2179 apacket->niovecs = saven;
2180 }
2181
2182 }
2183
2184 static void
2185 rxi_NetSendError(struct rx_call *call, int code)
2186 {
2187 int down = 0;
2188 #ifdef AFS_NT40_ENV
2189 if (code == -1 && WSAGetLastError() == WSAEHOSTUNREACH) {
2190 down = 1;
2191 }
2192 if (code == -WSAEHOSTUNREACH) {
2193 down = 1;
2194 }
2195 #elif defined(AFS_LINUX20_ENV)
2196 if (code == -ENETUNREACH) {
2197 down = 1;
2198 }
2199 #elif defined(AFS_DARWIN_ENV)
2200 if (code == EHOSTUNREACH) {
2201 down = 1;
2202 }
2203 #endif
2204 if (down) {
2205 call->lastReceiveTime = 0;
2206 }
2207 }
2208
2209 /* Send the packet to appropriate destination for the specified
2210 * call. The header is first encoded and placed in the packet.
2211 */
2212 void
2213 rxi_SendPacket(struct rx_call *call, struct rx_connection *conn,
2214 struct rx_packet *p, int istack)
2215 {
2216 #if defined(KERNEL)
2217 int waslocked;
2218 #endif
2219 int code;
2220 struct sockaddr_in addr;
2221 struct rx_peer *peer = conn->peer;
2222 osi_socket socket;
2223 #ifdef RXDEBUG
2224 char deliveryType = 'S';
2225 #endif
2226 /* The address we're sending the packet to */
2227 memset(&addr, 0, sizeof(addr));
2228 addr.sin_family = AF_INET;
2229 addr.sin_port = peer->port;
2230 addr.sin_addr.s_addr = peer->host;
2231 memset(&addr.sin_zero, 0, sizeof(addr.sin_zero));
2232
2233 /* This stuff should be revamped, I think, so that most, if not
2234 * all, of the header stuff is always added here. We could
2235 * probably do away with the encode/decode routines. XXXXX */
2236
2237 /* Stamp each packet with a unique serial number. The serial
2238 * number is maintained on a connection basis because some types
2239 * of security may be based on the serial number of the packet,
2240 * and security is handled on a per authenticated-connection
2241 * basis. */
2242 /* Pre-increment, to guarantee no zero serial number; a zero
2243 * serial number means the packet was never sent. */
2244 MUTEX_ENTER(&conn->conn_data_lock);
2245 p->header.serial = ++conn->serial;
2246 if (p->length > conn->peer->maxPacketSize) {
2247 if ((p->header.type == RX_PACKET_TYPE_ACK) &&
2248 (p->header.flags & RX_REQUEST_ACK)) {
2249 conn->lastPingSize = p->length;
2250 conn->lastPingSizeSer = p->header.serial;
2251 } else if (p->header.seq != 0) {
2252 conn->lastPacketSize = p->length;
2253 conn->lastPacketSizeSeq = p->header.seq;
2254 }
2255 }
2256 MUTEX_EXIT(&conn->conn_data_lock);
2257 /* This is so we can adjust retransmit time-outs better in the face of
2258 * rapidly changing round-trip times. RTO estimation is not a la Karn.
2259 */
2260 if (p->firstSerial == 0) {
2261 p->firstSerial = p->header.serial;
2262 }
2263 #ifdef RXDEBUG
2264 /* If an output tracer function is defined, call it with the packet and
2265 * network address. Note this function may modify its arguments. */
2266 if (rx_almostSent) {
2267 int drop = (*rx_almostSent) (p, &addr);
2268 /* drop packet if return value is non-zero? */
2269 if (drop)
2270 deliveryType = 'D'; /* Drop the packet */
2271 }
2272 #endif
2273
2274 /* Get network byte order header */
2275 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
2276 * touch ALL the fields */
2277
2278 /* Send the packet out on the same socket that related packets are being
2279 * received on */
2280 socket =
2281 (conn->type ==
2282 RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
2283
2284 #ifdef RXDEBUG
2285 /* Possibly drop this packet, for testing purposes */
2286 if ((deliveryType == 'D')
2287 || ((rx_intentionallyDroppedPacketsPer100 > 0)
2288 && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
2289 deliveryType = 'D'; /* Drop the packet */
2290 } else {
2291 deliveryType = 'S'; /* Send the packet */
2292 #endif /* RXDEBUG */
2293
2294 /* Loop until the packet is sent. We'd prefer just to use a
2295 * blocking socket, but unfortunately the interface doesn't
2296 * allow us to have the socket block in send mode, and not
2297 * block in receive mode */
2298 #ifdef KERNEL
2299 waslocked = ISAFS_GLOCK();
2300 #ifdef RX_KERNEL_TRACE
2301 if (ICL_SETACTIVE(afs_iclSetp)) {
2302 if (!waslocked)
2303 AFS_GLOCK();
2304 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2305 "before osi_NetSend()");
2306 AFS_GUNLOCK();
2307 }
2308 #else
2309 if (waslocked)
2310 AFS_GUNLOCK();
2311 #endif
2312 #endif
2313 if ((code =
2314 osi_NetSend(socket, &addr, p->wirevec, p->niovecs,
2315 p->length + RX_HEADER_SIZE, istack)) != 0) {
2316 /* send failed, so let's hurry up the resend, eh? */
2317 if (rx_stats_active)
2318 rx_atomic_inc(&rx_stats.netSendFailures);
2319 p->flags &= ~RX_PKTFLAG_SENT; /* resend it very soon */
2320
2321 /* Some systems are nice and tell us right away that we cannot
2322 * reach this recipient by returning an error code.
2323 * So, when this happens let's "down" the host NOW so
2324 * we don't sit around waiting for this host to timeout later.
2325 */
2326 if (call) {
2327 rxi_NetSendError(call, code);
2328 }
2329 }
2330 #ifdef KERNEL
2331 #ifdef RX_KERNEL_TRACE
2332 if (ICL_SETACTIVE(afs_iclSetp)) {
2333 AFS_GLOCK();
2334 afs_Trace1(afs_iclSetp, CM_TRACE_TIMESTAMP, ICL_TYPE_STRING,
2335 "after osi_NetSend()");
2336 if (!waslocked)
2337 AFS_GUNLOCK();
2338 }
2339 #else
2340 if (waslocked)
2341 AFS_GLOCK();
2342 #endif
2343 #endif
2344 #ifdef RXDEBUG
2345 }
2346 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %"AFS_PTR_FMT" len %d\n",
2347 deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(peer->host),
2348 ntohs(peer->port), p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber,
2349 p->header.seq, p->header.flags, p, p->length));
2350 #endif
2351 if (rx_stats_active) {
2352 rx_atomic_inc(&rx_stats.packetsSent[p->header.type - 1]);
2353 MUTEX_ENTER(&peer->peer_lock);
2354 peer->bytesSent += p->length;
2355 MUTEX_EXIT(&peer->peer_lock);
2356 }
2357 }
2358
2359 /* Send a list of packets to appropriate destination for the specified
2360 * connection. The headers are first encoded and placed in the packets.
2361 */
2362 void
2363 rxi_SendPacketList(struct rx_call *call, struct rx_connection *conn,
2364 struct rx_packet **list, int len, int istack)
2365 {
2366 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2367 int waslocked;
2368 #endif
2369 struct sockaddr_in addr;
2370 struct rx_peer *peer = conn->peer;
2371 osi_socket socket;
2372 struct rx_packet *p = NULL;
2373 struct iovec wirevec[RX_MAXIOVECS];
2374 int i, length, code;
2375 afs_uint32 serial;
2376 afs_uint32 temp;
2377 struct rx_jumboHeader *jp;
2378 #ifdef RXDEBUG
2379 char deliveryType = 'S';
2380 #endif
2381 /* The address we're sending the packet to */
2382 addr.sin_family = AF_INET;
2383 addr.sin_port = peer->port;
2384 addr.sin_addr.s_addr = peer->host;
2385 memset(&addr.sin_zero, 0, sizeof(addr.sin_zero));
2386
2387 if (len + 1 > RX_MAXIOVECS) {
2388 osi_Panic("rxi_SendPacketList, len > RX_MAXIOVECS\n");
2389 }
2390
2391 /*
2392 * Stamp the packets in this jumbogram with consecutive serial numbers
2393 */
2394 MUTEX_ENTER(&conn->conn_data_lock);
2395 serial = conn->serial;
2396 conn->serial += len;
2397 for (i = 0; i < len; i++) {
2398 p = list[i];
2399 /* a ping *or* a sequenced packet can count */
2400 if (p->length > conn->peer->maxPacketSize) {
2401 if (((p->header.type == RX_PACKET_TYPE_ACK) &&
2402 (p->header.flags & RX_REQUEST_ACK)) &&
2403 ((i == 0) || (p->length >= conn->lastPingSize))) {
2404 conn->lastPingSize = p->length;
2405 conn->lastPingSizeSer = serial + i;
2406 } else if ((p->header.seq != 0) &&
2407 ((i == 0) || (p->length >= conn->lastPacketSize))) {
2408 conn->lastPacketSize = p->length;
2409 conn->lastPacketSizeSeq = p->header.seq;
2410 }
2411 }
2412 }
2413 MUTEX_EXIT(&conn->conn_data_lock);
2414
2415
2416 /* This stuff should be revamped, I think, so that most, if not
2417 * all, of the header stuff is always added here. We could
2418 * probably do away with the encode/decode routines. XXXXX */
2419
2420 jp = NULL;
2421 length = RX_HEADER_SIZE;
2422 wirevec[0].iov_base = (char *)(&list[0]->wirehead[0]);
2423 wirevec[0].iov_len = RX_HEADER_SIZE;
2424 for (i = 0; i < len; i++) {
2425 p = list[i];
2426
2427 /* The whole 3.5 jumbogram scheme relies on packets fitting
2428 * in a single packet buffer. */
2429 if (p->niovecs > 2) {
2430 osi_Panic("rxi_SendPacketList, niovecs > 2\n");
2431 }
2432
2433 /* Set the RX_JUMBO_PACKET flags in all but the last packets
2434 * in this chunk. */
2435 if (i < len - 1) {
2436 if (p->length != RX_JUMBOBUFFERSIZE) {
2437 osi_Panic("rxi_SendPacketList, length != jumbo size\n");
2438 }
2439 p->header.flags |= RX_JUMBO_PACKET;
2440 length += RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2441 wirevec[i + 1].iov_len = RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2442 } else {
2443 wirevec[i + 1].iov_len = p->length;
2444 length += p->length;
2445 }
2446 wirevec[i + 1].iov_base = (char *)(&p->localdata[0]);
2447 if (jp != NULL) {
2448 /* Convert jumbo packet header to network byte order */
2449 temp = (afs_uint32) (p->header.flags) << 24;
2450 temp |= (afs_uint32) (p->header.spare);
2451 *(afs_uint32 *) jp = htonl(temp);
2452 }
2453 jp = (struct rx_jumboHeader *)
2454 ((char *)(&p->localdata[0]) + RX_JUMBOBUFFERSIZE);
2455
2456 /* Stamp each packet with a unique serial number. The serial
2457 * number is maintained on a connection basis because some types
2458 * of security may be based on the serial number of the packet,
2459 * and security is handled on a per authenticated-connection
2460 * basis. */
2461 /* Pre-increment, to guarantee no zero serial number; a zero
2462 * serial number means the packet was never sent. */
2463 p->header.serial = ++serial;
2464 /* This is so we can adjust retransmit time-outs better in the face of
2465 * rapidly changing round-trip times. RTO estimation is not a la Karn.
2466 */
2467 if (p->firstSerial == 0) {
2468 p->firstSerial = p->header.serial;
2469 }
2470 #ifdef RXDEBUG
2471 /* If an output tracer function is defined, call it with the packet and
2472 * network address. Note this function may modify its arguments. */
2473 if (rx_almostSent) {
2474 int drop = (*rx_almostSent) (p, &addr);
2475 /* drop packet if return value is non-zero? */
2476 if (drop)
2477 deliveryType = 'D'; /* Drop the packet */
2478 }
2479 #endif
2480
2481 /* Get network byte order header */
2482 rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, don't need to
2483 * touch ALL the fields */
2484 }
2485
2486 /* Send the packet out on the same socket that related packets are being
2487 * received on */
2488 socket =
2489 (conn->type ==
2490 RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
2491
2492 #ifdef RXDEBUG
2493 /* Possibly drop this packet, for testing purposes */
2494 if ((deliveryType == 'D')
2495 || ((rx_intentionallyDroppedPacketsPer100 > 0)
2496 && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) {
2497 deliveryType = 'D'; /* Drop the packet */
2498 } else {
2499 deliveryType = 'S'; /* Send the packet */
2500 #endif /* RXDEBUG */
2501
2502 /* Loop until the packet is sent. We'd prefer just to use a
2503 * blocking socket, but unfortunately the interface doesn't
2504 * allow us to have the socket block in send mode, and not
2505 * block in receive mode */
2506 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2507 waslocked = ISAFS_GLOCK();
2508 if (!istack && waslocked)
2509 AFS_GUNLOCK();
2510 #endif
2511 if ((code =
2512 osi_NetSend(socket, &addr, &wirevec[0], len + 1, length,
2513 istack)) != 0) {
2514 /* send failed, so let's hurry up the resend, eh? */
2515 if (rx_stats_active)
2516 rx_atomic_inc(&rx_stats.netSendFailures);
2517 for (i = 0; i < len; i++) {
2518 p = list[i];
2519 p->flags &= ~RX_PKTFLAG_SENT; /* resend it very soon */
2520 }
2521 /* Some systems are nice and tell us right away that we cannot
2522 * reach this recipient by returning an error code.
2523 * So, when this happens let's "down" the host NOW so
2524 * we don't sit around waiting for this host to timeout later.
2525 */
2526 if (call) {
2527 rxi_NetSendError(call, code);
2528 }
2529 }
2530 #if defined(AFS_SUN5_ENV) && defined(KERNEL)
2531 if (!istack && waslocked)
2532 AFS_GLOCK();
2533 #endif
2534 #ifdef RXDEBUG
2535 }
2536
2537 osi_Assert(p != NULL);
2538
2539 dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %"AFS_PTR_FMT" len %d\n",
2540 deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], ntohl(peer->host),
2541 ntohs(peer->port), p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber,
2542 p->header.seq, p->header.flags, p, p->length));
2543
2544 #endif
2545 if (rx_stats_active) {
2546 rx_atomic_inc(&rx_stats.packetsSent[p->header.type - 1]);
2547 MUTEX_ENTER(&peer->peer_lock);
2548 peer->bytesSent += p->length;
2549 MUTEX_EXIT(&peer->peer_lock);
2550 }
2551 }
2552
2553 /* Send a raw abort packet, without any call or connection structures */
2554 void
2555 rxi_SendRawAbort(osi_socket socket, afs_uint32 host, u_short port,
2556 afs_uint32 serial, afs_int32 error,
2557 struct rx_packet *source, int istack)
2558 {
2559 struct rx_header theader;
2560 struct sockaddr_in addr;
2561 struct iovec iov[2];
2562
2563 memset(&theader, 0, sizeof(theader));
2564 theader.epoch = htonl(source->header.epoch);
2565 theader.callNumber = htonl(source->header.callNumber);
2566 theader.serial = htonl(serial);
2567 theader.type = RX_PACKET_TYPE_ABORT;
2568 theader.serviceId = htons(source->header.serviceId);
2569 theader.securityIndex = source->header.securityIndex;
2570 theader.cid = htonl(source->header.cid);
2571
2572 /*
2573 * If the abort is being sent in response to a server initiated packet,
2574 * set client_initiated in the abort to ensure it is not associated by
2575 * the receiver with a connection in the opposite direction.
2576 */
2577 if ((source->header.flags & RX_CLIENT_INITIATED) != RX_CLIENT_INITIATED)
2578 theader.flags |= RX_CLIENT_INITIATED;
2579
2580 error = htonl(error);
2581
2582 iov[0].iov_base = &theader;
2583 iov[0].iov_len = sizeof(struct rx_header);
2584 iov[1].iov_base = &error;
2585 iov[1].iov_len = sizeof(error);
2586
2587 addr.sin_family = AF_INET;
2588 addr.sin_addr.s_addr = host;
2589 addr.sin_port = port;
2590 memset(&addr.sin_zero, 0, sizeof(addr.sin_zero));
2591 #ifdef STRUCT_SOCKADDR_HAS_SA_LEN
2592 addr.sin_len = sizeof(struct sockaddr_in);
2593 #endif
2594
2595 osi_NetSend(socket, &addr, iov, 2,
2596 sizeof(struct rx_header) + sizeof(error), istack);
2597 }
2598
2599 /* Send a "special" packet to the peer connection. If call is
2600 * specified, then the packet is directed to a specific call channel
2601 * associated with the connection, otherwise it is directed to the
2602 * connection only. Uses optionalPacket if it is supplied, rather than
2603 * allocating a new packet buffer. Nbytes is the length of the data
2604 * portion of the packet. If data is non-null, nbytes of data are
2605 * copied into the packet. Type is the type of the packet, as defined
2606 * in rx.h. Bug: there's a lot of duplication between this and other
2607 * routines. This needs to be cleaned up. */
2608 struct rx_packet *
2609 rxi_SendSpecial(struct rx_call *call,
2610 struct rx_connection *conn,
2611 struct rx_packet *optionalPacket, int type, char *data,
2612 int nbytes, int istack)
2613 {
2614 /* Some of the following stuff should be common code for all
2615 * packet sends (it's repeated elsewhere) */
2616 struct rx_packet *p;
2617 unsigned int i = 0;
2618 int savelen = 0, saven = 0;
2619 int channel, callNumber;
2620 if (call) {
2621 channel = call->channel;
2622 callNumber = *call->callNumber;
2623 /* BUSY packets refer to the next call on this connection */
2624 if (type == RX_PACKET_TYPE_BUSY) {
2625 callNumber++;
2626 }
2627 } else {
2628 channel = 0;
2629 callNumber = 0;
2630 }
2631 p = optionalPacket;
2632 if (!p) {
2633 p = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
2634 if (!p)
2635 osi_Panic("rxi_SendSpecial failure");
2636 }
2637
2638 if (nbytes != -1)
2639 p->length = nbytes;
2640 else
2641 nbytes = p->length;
2642 p->header.serviceId = conn->serviceId;
2643 p->header.securityIndex = conn->securityIndex;
2644 p->header.cid = (conn->cid | channel);
2645 p->header.callNumber = callNumber;
2646 p->header.seq = 0;
2647 p->header.epoch = conn->epoch;
2648 p->header.type = type;
2649 p->header.userStatus = 0;
2650 p->header.flags = 0;
2651 if (conn->type == RX_CLIENT_CONNECTION)
2652 p->header.flags |= RX_CLIENT_INITIATED;
2653 if (data)
2654 rx_packetwrite(p, 0, nbytes, data);
2655
2656 for (i = 1; i < p->niovecs; i++) {
2657 if (nbytes <= p->wirevec[i].iov_len) {
2658 savelen = p->wirevec[i].iov_len;
2659 saven = p->niovecs;
2660 p->wirevec[i].iov_len = nbytes;
2661 p->niovecs = i + 1; /* so condition fails because i == niovecs */
2662 } else
2663 nbytes -= p->wirevec[i].iov_len;
2664 }
2665
2666 if (call)
2667 rxi_Send(call, p, istack);
2668 else
2669 rxi_SendPacket((struct rx_call *)0, conn, p, istack);
2670 if (saven) { /* means we truncated the packet above. We probably don't */
2671 /* really need to do this, but it seems safer this way, given that */
2672 /* sneaky optionalPacket... */
2673 p->wirevec[i - 1].iov_len = savelen;
2674 p->niovecs = saven;
2675 }
2676 if (!optionalPacket)
2677 rxi_FreePacket(p);
2678 return optionalPacket;
2679 }
2680
2681
2682 /* Encode the packet's header (from the struct header in the packet to
2683 * the net byte order representation in the wire representation of the
2684 * packet, which is what is actually sent out on the wire) */
2685 void
2686 rxi_EncodePacketHeader(struct rx_packet *p)
2687 {
2688 afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
2689
2690 memset(buf, 0, RX_HEADER_SIZE);
2691 *buf++ = htonl(p->header.epoch);
2692 *buf++ = htonl(p->header.cid);
2693 *buf++ = htonl(p->header.callNumber);
2694 *buf++ = htonl(p->header.seq);
2695 *buf++ = htonl(p->header.serial);
2696 *buf++ = htonl((((afs_uint32) p->header.type) << 24)
2697 | (((afs_uint32) p->header.flags) << 16)
2698 | (p->header.userStatus << 8) | p->header.securityIndex);
2699 /* Note: top 16 bits of this next word were reserved */
2700 *buf++ = htonl((p->header.spare << 16) | (p->header.serviceId & 0xffff));
2701 }
2702
2703 /* Decode the packet's header (from net byte order to a struct header) */
2704 void
2705 rxi_DecodePacketHeader(struct rx_packet *p)
2706 {
2707 afs_uint32 *buf = (afs_uint32 *) (p->wirevec[0].iov_base); /* MTUXXX */
2708 afs_uint32 temp;
2709
2710 p->header.epoch = ntohl(*buf);
2711 buf++;
2712 p->header.cid = ntohl(*buf);
2713 buf++;
2714 p->header.callNumber = ntohl(*buf);
2715 buf++;
2716 p->header.seq = ntohl(*buf);
2717 buf++;
2718 p->header.serial = ntohl(*buf);
2719 buf++;
2720
2721 temp = ntohl(*buf);
2722 buf++;
2723
2724 /* C will truncate byte fields to bytes for me */
2725 p->header.type = temp >> 24;
2726 p->header.flags = temp >> 16;
2727 p->header.userStatus = temp >> 8;
2728 p->header.securityIndex = temp >> 0;
2729
2730 temp = ntohl(*buf);
2731 buf++;
2732
2733 p->header.serviceId = (temp & 0xffff);
2734 p->header.spare = temp >> 16;
2735 /* Note: top 16 bits of this last word are the security checksum */
2736 }
2737
2738 /*
2739 * LOCKS HELD: called with call->lock held.
2740 *
2741 * PrepareSendPacket is the only place in the code that
2742 * can increment call->tnext. This could become an atomic
2743 * in the future. Beyond that there is nothing in this
2744 * function that requires the call being locked. This
2745 * function can only be called by the application thread.
2746 */
2747 void
2748 rxi_PrepareSendPacket(struct rx_call *call,
2749 struct rx_packet *p, int last)
2750 {
2751 struct rx_connection *conn = call->conn;
2752 afs_uint32 seq = call->tnext++;
2753 unsigned int i;
2754 afs_int32 len; /* len must be a signed type; it can go negative */
2755 int code;
2756
2757 /* No data packets on call 0. Where do these come from? */
2758 if (*call->callNumber == 0)
2759 *call->callNumber = 1;
2760
2761 MUTEX_EXIT(&call->lock);
2762 p->flags &= ~(RX_PKTFLAG_ACKED | RX_PKTFLAG_SENT);
2763
2764 p->header.cid = (conn->cid | call->channel);
2765 p->header.serviceId = conn->serviceId;
2766 p->header.securityIndex = conn->securityIndex;
2767
2768 p->header.callNumber = *call->callNumber;
2769 p->header.seq = seq;
2770 p->header.epoch = conn->epoch;
2771 p->header.type = RX_PACKET_TYPE_DATA;
2772 p->header.userStatus = 0;
2773 p->header.flags = 0;
2774 p->header.spare = 0;
2775 if (conn->type == RX_CLIENT_CONNECTION)
2776 p->header.flags |= RX_CLIENT_INITIATED;
2777
2778 if (last)
2779 p->header.flags |= RX_LAST_PACKET;
2780
2781 clock_Zero(&p->firstSent); /* Never yet transmitted */
2782 p->header.serial = 0; /* Another way of saying never transmitted... */
2783
2784 /* Now that we're sure this is the last data on the call, make sure
2785 * that the "length" and the sum of the iov_lens matches. */
2786 len = p->length + call->conn->securityHeaderSize;
2787
2788 for (i = 1; i < p->niovecs && len > 0; i++) {
2789 len -= p->wirevec[i].iov_len;
2790 }
2791 if (len > 0) {
2792 osi_Panic("PrepareSendPacket 1\n"); /* MTUXXX */
2793 } else if (i < p->niovecs) {
2794 /* Free any extra elements in the wirevec */
2795 #if defined(RX_ENABLE_TSFPQ)
2796 rxi_FreeDataBufsTSFPQ(p, i, 1 /* allow global pool flush if overquota */);
2797 #else /* !RX_ENABLE_TSFPQ */
2798 MUTEX_ENTER(&rx_freePktQ_lock);
2799 rxi_FreeDataBufsNoLock(p, i);
2800 MUTEX_EXIT(&rx_freePktQ_lock);
2801 #endif /* !RX_ENABLE_TSFPQ */
2802
2803 p->niovecs = i;
2804 }
2805 if (len)
2806 p->wirevec[i - 1].iov_len += len;
2807 MUTEX_ENTER(&call->lock);
2808 code = RXS_PreparePacket(conn->securityObject, call, p);
2809 if (code) {
2810 MUTEX_EXIT(&call->lock);
2811 rxi_ConnectionError(conn, code);
2812 MUTEX_ENTER(&conn->conn_data_lock);
2813 p = rxi_SendConnectionAbort(conn, p, 0, 0);
2814 MUTEX_EXIT(&conn->conn_data_lock);
2815 MUTEX_ENTER(&call->lock);
2816 /* setting a connection error means all calls for that conn are also
2817 * error'd. if this call does not have an error by now, something is
2818 * very wrong, and we risk sending data in the clear that is supposed
2819 * to be encrypted. */
2820 osi_Assert(call->error);
2821 }
2822 }
2823
2824 /* Given an interface MTU size, calculate an adjusted MTU size that
2825 * will make efficient use of the RX buffers when the peer is sending
2826 * either AFS 3.4a jumbograms or AFS 3.5 jumbograms. */
2827 int
2828 rxi_AdjustIfMTU(int mtu)
2829 {
2830 int adjMTU;
2831 int frags;
2832
2833 if (rxi_nRecvFrags == 1 && rxi_nSendFrags == 1)
2834 return mtu;
2835 adjMTU = RX_HEADER_SIZE + RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
2836 if (mtu <= adjMTU) {
2837 return mtu;
2838 }
2839 mtu -= adjMTU;
2840 if (mtu <= 0) {
2841 return adjMTU;
2842 }
2843 frags = mtu / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE);
2844 return (adjMTU + (frags * (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
2845 }
2846
2847 /* Given an interface MTU size, and the peer's advertised max receive
2848 * size, calculate an adjisted maxMTU size that makes efficient use
2849 * of our packet buffers when we are sending AFS 3.4a jumbograms. */
2850 int
2851 rxi_AdjustMaxMTU(int mtu, int peerMaxMTU)
2852 {
2853 int maxMTU = mtu * rxi_nSendFrags;
2854 maxMTU = MIN(maxMTU, peerMaxMTU);
2855 return rxi_AdjustIfMTU(maxMTU);
2856 }
2857
2858 /* Given a packet size, figure out how many datagram packet will fit.
2859 * The first buffer always contains RX_HEADER_SIZE+RX_JUMBOBUFFERSIZE+
2860 * RX_JUMBOHEADERSIZE, the middle buffers contain RX_JUMBOBUFFERSIZE+
2861 * RX_JUMBOHEADERSIZE, and the last buffer contains RX_JUMBOBUFFERSIZE */
2862 int
2863 rxi_AdjustDgramPackets(int frags, int mtu)
2864 {
2865 int maxMTU;
2866 if (mtu + IPv6_FRAG_HDR_SIZE < RX_JUMBOBUFFERSIZE + RX_HEADER_SIZE) {
2867 return 1;
2868 }
2869 maxMTU = (frags * (mtu + UDP_HDR_SIZE)) - UDP_HDR_SIZE;
2870 maxMTU = MIN(maxMTU, RX_MAX_PACKET_SIZE);
2871 /* subtract the size of the first and last packets */
2872 maxMTU -= RX_HEADER_SIZE + (2 * RX_JUMBOBUFFERSIZE) + RX_JUMBOHEADERSIZE;
2873 if (maxMTU < 0) {
2874 return 1;
2875 }
2876 return (2 + (maxMTU / (RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE)));
2877 }
2878
2879 #ifndef KERNEL
2880 /*
2881 * This function can be used by the Windows Cache Manager
2882 * to dump the list of all rx packets so that we can determine
2883 * where the packet leakage is.
2884 */
2885 int rx_DumpPackets(FILE *outputFile, char *cookie)
2886 {
2887 #ifdef RXDEBUG_PACKET
2888 struct rx_packet *p;
2889 #ifdef AFS_NT40_ENV
2890 int zilch;
2891 char output[2048];
2892 #define RXDPRINTF sprintf
2893 #define RXDPRINTOUT output
2894 #else
2895 #define RXDPRINTF fprintf
2896 #define RXDPRINTOUT outputFile
2897 #endif
2898
2899 NETPRI;
2900 MUTEX_ENTER(&rx_freePktQ_lock);
2901 RXDPRINTF(RXDPRINTOUT, "%s - Start dumping all Rx Packets - count=%u\r\n", cookie, rx_packet_id);
2902 #ifdef AFS_NT40_ENV
2903 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2904 #endif
2905
2906 for (p = rx_mallocedP; p; p = p->allNextp) {
2907 RXDPRINTF(RXDPRINTOUT, "%s - packet=0x%p, id=%u, firstSent=%u.%08u, timeSent=%u.%08u, firstSerial=%u, niovecs=%u, flags=0x%x, length=%u header: epoch=%u, cid=%u, callNum=%u, seq=%u, serial=%u, type=%u, flags=0x%x, userStatus=%u, securityIndex=%u, serviceId=%u\r\n",
2908 cookie, p, p->packetId, p->firstSent.sec, p->firstSent.usec, p->timeSent.sec, p->timeSent.usec,
2909 p->firstSerial, p->niovecs, (afs_uint32)p->flags, (afs_uint32)p->length,
2910 p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.serial,
2911 (afs_uint32)p->header.type, (afs_uint32)p->header.flags, (afs_uint32)p->header.userStatus,
2912 (afs_uint32)p->header.securityIndex, (afs_uint32)p->header.serviceId);
2913 #ifdef AFS_NT40_ENV
2914 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2915 #endif
2916 }
2917
2918 RXDPRINTF(RXDPRINTOUT, "%s - End dumping all Rx Packets\r\n", cookie);
2919 #ifdef AFS_NT40_ENV
2920 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2921 #endif
2922
2923 MUTEX_EXIT(&rx_freePktQ_lock);
2924 USERPRI;
2925 #endif /* RXDEBUG_PACKET */
2926 return 0;
2927 }
2928 #endif