Import Upstream version 1.8.5
[hcoop/debian/openafs.git] / src / afs / afs_analyze.c
1 /*
2 * Copyright 2000, International Business Machines Corporation and others.
3 * All Rights Reserved.
4 *
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
8 */
9
10 /*
11 * Implements:
12 */
13 #include <afsconfig.h>
14 #include "afs/param.h"
15
16
17 #include "afs/stds.h"
18 #include "afs/sysincludes.h" /* Standard vendor system headers */
19
20 #ifndef UKERNEL
21 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_FBSD_ENV)
22 #include <net/if.h>
23 #include <netinet/in.h>
24 #endif
25
26 #ifdef AFS_SGI62_ENV
27 #include "h/hashing.h"
28 #endif
29 #if !defined(AFS_HPUX110_ENV) && !defined(AFS_LINUX20_ENV) && !defined(AFS_FBSD_ENV) && !defined(AFS_DARWIN_ENV)
30 #include <netinet/in_var.h>
31 #endif
32 #endif /* !UKERNEL */
33
34 #include "afsincludes.h" /* Afs-based standard headers */
35 #include "afs/afs_stats.h" /* afs statistics */
36 #include "afs/afs_util.h"
37 #include "afs/unified_afs.h"
38
39 #if defined(AFS_SUN5_ENV)
40 #include <inet/led.h>
41 #include <inet/common.h>
42 #include <netinet/ip6.h>
43 #include <inet/ip.h>
44 #endif
45
46 /* shouldn't do it this way, but for now will do */
47 #ifndef ERROR_TABLE_BASE_U
48 #define ERROR_TABLE_BASE_U (5376L)
49 #endif /* ubik error base define */
50
51 /* shouldn't do it this way, but for now will do */
52 #ifndef ERROR_TABLE_BASE_uae
53 #define ERROR_TABLE_BASE_uae (49733376L)
54 #endif /* unified afs error base define */
55
56 /* same hack for vlserver error base as for ubik error base */
57 #ifndef ERROR_TABLE_BASE_VL
58 #define ERROR_TABLE_BASE_VL (363520L)
59 #define VL_NOENT (363524L)
60 #endif /* vlserver error base define */
61
62
63 int afs_BusyWaitPeriod = 15; /**< poll period, in seconds */
64
65 afs_int32 hm_retry_RO = 0; /**< enable read-only hard-mount retry */
66 afs_int32 hm_retry_RW = 0; /**< enable read-write hard-mount retry */
67 afs_int32 hm_retry_int = 0; /**< hard-mount retry interval, in seconds */
68
69 #define VSleep(at) afs_osi_Wait((at)*1000, 0, 0)
70
71
72 int lastcode;
73 #define DIFFERENT 0
74 #define SAME 1
75 #define DUNNO 2
76 /*!
77 * \brief
78 * Request vldb record to determined if it has changed.
79 *
80 * \retval 0 if the vldb record for a specific volume is different from what
81 * we have cached -- perhaps the volume has moved.
82 * \retval 1 if the vldb record is the same
83 * \retval 2 if we can't tell if it's the same or not.
84 *
85 * \note
86 * If 0 returned, the caller will probably start over at the beginning of our
87 * list of servers for this volume and try to find one that is up. If
88 * not 0, we will probably just keep plugging with what we have
89 * cached. If we fail to contact the VL server, we should just keep
90 * trying with the information we have, rather than failing.
91 */
92 static int
93 VLDB_Same(struct VenusFid *afid, struct vrequest *areq)
94 {
95 struct vrequest *treq = NULL;
96 struct afs_conn *tconn;
97 int i, type = 0;
98 union {
99 struct vldbentry tve;
100 struct nvldbentry ntve;
101 struct uvldbentry utve;
102 } *v;
103 struct volume *tvp;
104 struct cell *tcell;
105 char *bp, tbuf[CVBS]; /* biggest volume id is 2^32, ~ 4*10^9 */
106 unsigned int changed;
107 struct server *(oldhosts[NMAXNSERVERS]);
108 struct rx_connection *rxconn;
109
110 AFS_STATCNT(CheckVLDB);
111 afs_FinalizeReq(areq);
112
113 if ((i = afs_CreateReq(&treq, afs_osi_credp)))
114 return DUNNO;
115 v = afs_osi_Alloc(sizeof(*v));
116 osi_Assert(v != NULL);
117 tcell = afs_GetCell(afid->Cell, READ_LOCK);
118 bp = afs_cv2string(&tbuf[CVBS], afid->Fid.Volume);
119 do {
120 VSleep(2); /* Better safe than sorry. */
121 tconn =
122 afs_ConnByMHosts(tcell->cellHosts, tcell->vlport, tcell->cellNum,
123 treq, SHARED_LOCK, 0, &rxconn);
124 if (tconn) {
125 if ( tconn->parent->srvr->server->flags & SNO_LHOSTS) {
126 type = 0;
127 RX_AFS_GUNLOCK();
128 i = VL_GetEntryByNameO(rxconn, bp, &v->tve);
129 RX_AFS_GLOCK();
130 } else if (tconn->parent->srvr->server->flags & SYES_LHOSTS) {
131 type = 1;
132 RX_AFS_GUNLOCK();
133 i = VL_GetEntryByNameN(rxconn, bp, &v->ntve);
134 RX_AFS_GLOCK();
135 } else {
136 type = 2;
137 RX_AFS_GUNLOCK();
138 i = VL_GetEntryByNameU(rxconn, bp, &v->utve);
139 RX_AFS_GLOCK();
140 if (!(tconn->parent->srvr->server->flags & SVLSRV_UUID)) {
141 if (i == RXGEN_OPCODE) {
142 type = 1;
143 RX_AFS_GUNLOCK();
144 i = VL_GetEntryByNameN(rxconn, bp, &v->ntve);
145 RX_AFS_GLOCK();
146 if (i == RXGEN_OPCODE) {
147 type = 0;
148 tconn->parent->srvr->server->flags |= SNO_LHOSTS;
149 RX_AFS_GUNLOCK();
150 i = VL_GetEntryByNameO(rxconn, bp, &v->tve);
151 RX_AFS_GLOCK();
152 } else if (!i)
153 tconn->parent->srvr->server->flags |= SYES_LHOSTS;
154 } else if (!i)
155 tconn->parent->srvr->server->flags |= SVLSRV_UUID;
156 }
157 lastcode = i;
158 }
159 } else
160 i = -1;
161 } while (afs_Analyze(tconn, rxconn, i, NULL, treq, -1, /* no op code for this */
162 SHARED_LOCK, tcell));
163
164 afs_PutCell(tcell, READ_LOCK);
165 afs_Trace2(afs_iclSetp, CM_TRACE_CHECKVLDB, ICL_TYPE_FID, &afid,
166 ICL_TYPE_INT32, i);
167
168 if (i) {
169 afs_DestroyReq(treq);
170 afs_osi_Free(v, sizeof(*v));
171 return DUNNO;
172 }
173 /* have info, copy into serverHost array */
174 changed = 0;
175 tvp = afs_FindVolume(afid, WRITE_LOCK);
176 if (tvp) {
177 ObtainWriteLock(&tvp->lock, 107);
178 for (i = 0; i < NMAXNSERVERS && tvp->serverHost[i]; i++) {
179 oldhosts[i] = tvp->serverHost[i];
180 }
181 ReleaseWriteLock(&tvp->lock);
182
183 if (type == 2) {
184 LockAndInstallUVolumeEntry(tvp, &v->utve, afid->Cell, tcell, treq);
185 } else if (type == 1) {
186 LockAndInstallNVolumeEntry(tvp, &v->ntve, afid->Cell);
187 } else {
188 LockAndInstallVolumeEntry(tvp, &v->tve, afid->Cell);
189 }
190
191 if (i < NMAXNSERVERS && tvp->serverHost[i]) {
192 changed = 1;
193 }
194 for (--i; !changed && i >= 0; i--) {
195 if (tvp->serverHost[i] != oldhosts[i]) {
196 changed = 1; /* also happens if prefs change. big deal. */
197 }
198 }
199
200 tvp->states &= ~VRecheck; /* Just checked it. */
201 tvp->setupTime = osi_Time(); /* Time the vldb was checked. */
202
203 ReleaseWriteLock(&tvp->lock);
204 afs_PutVolume(tvp, WRITE_LOCK);
205 } else { /* can't find volume */
206 tvp = afs_GetVolume(afid, treq, WRITE_LOCK);
207 if (tvp) {
208 afs_PutVolume(tvp, WRITE_LOCK);
209 afs_DestroyReq(treq);
210 afs_osi_Free(v, sizeof(*v));
211 return DIFFERENT;
212 } else {
213 afs_DestroyReq(treq);
214 afs_osi_Free(v, sizeof(*v));
215 return DUNNO;
216 }
217 }
218
219 afs_DestroyReq(treq);
220 afs_osi_Free(v, sizeof(*v));
221 return (changed ? DIFFERENT : SAME);
222 } /*VLDB_Same */
223
224 /*!
225 * \brief
226 * Mark a server as invalid for further attempts of this request only.
227 *
228 * \param[in,out] areq The request record associated with this operation.
229 * \param[in] afid The FID of the file involved in the action. This argument
230 * may be null if none was involved.
231 * \param[in,out] tsp pointer to a server struct for the server we wish to
232 * blacklist.
233 *
234 * \returns
235 * Non-zero value if further servers are available to try,
236 * zero otherwise.
237 *
238 * \note
239 * This routine is typically called in situations where we believe
240 * one server out of a pool may have an error condition.
241 *
242 * \note
243 * The afs_Conn* routines use the list of invalidated servers to
244 * avoid reusing a server marked as invalid for this request.
245 */
246 static afs_int32
247 afs_BlackListOnce(struct vrequest *areq, struct VenusFid *afid,
248 struct server *tsp)
249 {
250 struct volume *tvp;
251 afs_int32 i;
252 afs_int32 serversleft = 0;
253
254 if (afid) {
255 tvp = afs_FindVolume(afid, READ_LOCK);
256 if (tvp) {
257 for (i = 0; i < AFS_MAXHOSTS; i++) {
258 if (tvp->serverHost[i] == tsp) {
259 areq->skipserver[i] = 1;
260 }
261 if (tvp->serverHost[i] &&
262 (tvp->serverHost[i]->addr->sa_flags &
263 SRVR_ISDOWN)) {
264 areq->skipserver[i] = 1;
265 }
266 }
267 for (i = 0; i < AFS_MAXHOSTS; i++) {
268 if (tvp->serverHost[i] && areq->skipserver[i] == 0) {
269 serversleft = 1;
270 break;
271 }
272 }
273 afs_PutVolume(tvp, READ_LOCK);
274 return serversleft;
275 }
276 }
277 return serversleft;
278 }
279
280 /*!
281 * \brief
282 * Clear any cached status for the target FID of a failed fileserver
283 * write RPC.
284 *
285 * \param[in] afid The FID of the file involved in the action. This argument
286 * may be null if none was involved.
287 * \param[in] op which RPC we are analyzing.
288 * \param[in,out] avp A pointer to the struct volume, if we already have one.
289 *
290 * \returns
291 * Non-zero value if the related RPC operation can be retried,
292 * zero otherwise.
293 *
294 * \note
295 * This routine is called when we got a network error,
296 * and discards state if the operation was a data-mutating
297 * operation.
298 */
299 static int
300 afs_ClearStatus(struct VenusFid *afid, int op, struct volume *avp)
301 {
302 struct volume *tvp = NULL;
303
304 /* if it's not a write op, we have nothing to veto and shouldn't clear. */
305 if (!AFS_STATS_FS_RPCIDXES_ISWRITE(op)) {
306 return 1;
307 }
308
309 if (avp)
310 tvp = avp;
311 else if (afid)
312 tvp = afs_FindVolume(afid, READ_LOCK);
313
314 /* don't assume just discarding will fix if no cached volume */
315 if (tvp) {
316 struct vcache *tvc;
317 ObtainReadLock(&afs_xvcache);
318 if ((tvc = afs_FindVCache(afid, 0, 0))) {
319 ReleaseReadLock(&afs_xvcache);
320 afs_StaleVCacheFlags(tvc, AFS_STALEVC_NOCB | AFS_STALEVC_NODNLC,
321 CUnique);
322 afs_PutVCache(tvc);
323 } else {
324 ReleaseReadLock(&afs_xvcache);
325 }
326 if (!avp)
327 afs_PutVolume(tvp, READ_LOCK);
328 }
329
330 if (AFS_STATS_FS_RPCIDXES_WRITE_RETRIABLE(op))
331 return 1;
332
333 /* not retriable: we may have raced ourselves */
334 return 0;
335 }
336
337 /*!
338 * \brief
339 * Print the last errors from the servers for the volume on
340 * this request.
341 *
342 * \param[in] areq The request record associated with this operation.
343 * \param[in] afid The FID of the file involved in the action. This argument
344 * may be null if none was involved.
345 *
346 * \return
347 * None
348 *
349 * \note
350 * This routine is called before a hard-mount retry, to display
351 * the servers by primary address and the errors encountered.
352 */
353 static void
354 afs_PrintServerErrors(struct vrequest *areq, struct VenusFid *afid)
355 {
356 int i;
357 struct volume *tvp;
358 struct srvAddr *sa;
359 afs_uint32 address;
360 char *sep = " (";
361 char *term = "";
362
363 if (afid) {
364 tvp = afs_FindVolume(afid, READ_LOCK);
365 if (tvp) {
366 for (i = 0; i < AFS_MAXHOSTS; i++) {
367 if (areq->lasterror[i] && tvp->serverHost[i]) {
368 sa = tvp->serverHost[i]->addr;
369 if (sa) {
370 address = ntohl(sa->sa_ip);
371 afs_warnuser("%s%d.%d.%d.%d code=%d", sep,
372 (address >> 24), (address >> 16) & 0xff,
373 (address >> 8) & 0xff, (address) & 0xff,
374 areq->lasterror[i]);
375 sep = ", ";
376 term = ")";
377 }
378 }
379 }
380 afs_PutVolume(tvp, READ_LOCK);
381 }
382 }
383 afs_warnuser("%s\n", term);
384 }
385
386 /*!
387 * \brief
388 * Analyze the outcome of an RPC operation, taking whatever support
389 * actions are necessary.
390 *
391 * \param[in] aconn Ptr to the relevant connection on which the call was made.
392 * \param[in] rxconn Ptr to the rx_connection.
393 * \param[in] acode The return code experienced by the RPC.
394 * \param[in] fid The FID of the file involved in the action. This argument
395 * may be null if none was involved.
396 * \param[in,out] areq The request record associated with this operation.
397 * \param[in] op which RPC we are analyzing.
398 * \param[in] cellp pointer to a cell struct. Must provide either fid or cell.
399 *
400 * \returns
401 * Non-zero value if the related RPC operation should be retried,
402 * zero otherwise.
403 *
404 * \note
405 * This routine is typically called in a do-while loop, causing the
406 * embedded RPC operation to be called repeatedly if appropriate
407 * until whatever error condition (if any) is intolerable.
408 *
409 * \note
410 * The retry return value is used by afs_StoreAllSegments to determine
411 * if this is a temporary or permanent error.
412 */
413 int
414 afs_Analyze(struct afs_conn *aconn, struct rx_connection *rxconn,
415 afs_int32 acode, struct VenusFid *afid, struct vrequest *areq,
416 int op, afs_int32 locktype, struct cell *cellp)
417 {
418 afs_int32 i;
419 struct srvAddr *sa;
420 struct server *tsp;
421 struct volume *tvp = NULL;
422 afs_int32 shouldRetry = 0;
423 afs_int32 serversleft = 1;
424 struct afs_stats_RPCErrors *aerrP;
425 afs_uint32 address;
426
427 if (AFS_IS_DISCONNECTED && !AFS_IN_SYNC) {
428 /* On reconnection, act as connected. XXX: for now.... */
429 /* SXW - This may get very tired after a while. We should try and
430 * intercept all RPCs before they get here ... */
431 /*printf("afs_Analyze: disconnected\n");*/
432 afs_FinalizeReq(areq);
433 if (aconn) {
434 /* SXW - I suspect that this will _never_ happen - we shouldn't
435 * get a connection because we're disconnected !!!*/
436 afs_PutConn(aconn, rxconn, locktype);
437 }
438 return 0;
439 }
440
441 AFS_STATCNT(afs_Analyze);
442 afs_Trace4(afs_iclSetp, CM_TRACE_ANALYZE, ICL_TYPE_INT32, op,
443 ICL_TYPE_POINTER, aconn, ICL_TYPE_INT32, acode, ICL_TYPE_LONG,
444 areq->uid);
445
446 aerrP = (struct afs_stats_RPCErrors *)0;
447
448 if ((op >= 0) && (op < AFS_STATS_NUM_FS_RPC_OPS))
449 aerrP = &(afs_stats_cmfullperf.rpc.fsRPCErrors[op]);
450
451 afs_FinalizeReq(areq);
452 if (!aconn && areq->busyCount) { /* one RPC or more got VBUSY/VRESTARTING */
453
454 tvp = afs_FindVolume(afid, READ_LOCK);
455 if (tvp) {
456 afs_warnuser("afs: Waiting for busy volume %u (%s) in cell %s\n",
457 (afid ? afid->Fid.Volume : 0),
458 (tvp->name ? tvp->name : ""),
459 ((tvp->serverHost[0]
460 && tvp->serverHost[0]->cell) ? tvp->serverHost[0]->
461 cell->cellName : ""));
462
463 for (i = 0; i < AFS_MAXHOSTS; i++) {
464 if (tvp->status[i] != not_busy && tvp->status[i] != offline) {
465 tvp->status[i] = not_busy;
466 }
467 if (tvp->status[i] == not_busy)
468 shouldRetry = 1;
469 }
470 afs_PutVolume(tvp, READ_LOCK);
471 } else {
472 afs_warnuser("afs: Waiting for busy volume %u\n",
473 (afid ? afid->Fid.Volume : 0));
474 }
475
476 if (areq->busyCount > 100) {
477 if (aerrP)
478 (aerrP->err_Volume)++;
479 areq->volumeError = VOLBUSY;
480 shouldRetry = 0;
481 } else {
482 VSleep(afs_BusyWaitPeriod); /* poll periodically */
483 }
484 if (shouldRetry != 0)
485 areq->busyCount++;
486
487 return shouldRetry; /* should retry */
488 }
489
490 if (!aconn || !aconn->parent->srvr) {
491 if (!areq->volumeError) {
492 if (aerrP)
493 (aerrP->err_Network)++;
494 if (hm_retry_int && !(areq->flags & O_NONBLOCK) && /* "hard" mount */
495 ((afid && afs_IsPrimaryCellNum(afid->Cell))
496 || (cellp && afs_IsPrimaryCell(cellp)))) {
497 if (!afid) {
498 static int afs_vl_hm = 0;
499 int warn = 0;
500 if (!afs_vl_hm) {
501 afs_vl_hm = warn = 1;
502 }
503 if (warn) {
504 afs_warnuser
505 ("afs: hard-mount waiting for a vlserver to return to service\n");
506 }
507 VSleep(hm_retry_int);
508 afs_CheckServers(1, cellp);
509 shouldRetry = 1;
510
511 if (warn) {
512 afs_vl_hm = 0;
513 }
514 } else {
515 static int afs_unknown_vhm = 0;
516 int warn = 0, vp_vhm = 0;
517
518 tvp = afs_FindVolume(afid, READ_LOCK);
519 if (!tvp || (tvp->states & VRO)) {
520 shouldRetry = hm_retry_RO;
521 } else {
522 shouldRetry = hm_retry_RW;
523 }
524
525 /* Set 'warn' if we should afs_warnuser. Only let one
526 * caller call afs_warnuser per hm_retry_int interval per
527 * volume. */
528 if (shouldRetry) {
529 if (tvp) {
530 if (!(tvp->states & VHardMount)) {
531 tvp->states |= VHardMount;
532 warn = vp_vhm = 1;
533 }
534 } else {
535 if (!afs_unknown_vhm) {
536 afs_unknown_vhm = 1;
537 warn = 1;
538 }
539 }
540 }
541
542 if (tvp)
543 afs_PutVolume(tvp, READ_LOCK);
544
545 if (shouldRetry) {
546 if (warn) {
547 afs_warnuser
548 ("afs: hard-mount waiting for volume %u",
549 afid->Fid.Volume);
550 afs_PrintServerErrors(areq, afid);
551 }
552
553 VSleep(hm_retry_int);
554 afs_CheckServers(1, cellp);
555 /* clear the black listed servers on this request. */
556 memset(areq->skipserver, 0, sizeof(areq->skipserver));
557
558 if (vp_vhm) {
559 tvp = afs_FindVolume(afid, READ_LOCK);
560 if (tvp) {
561 tvp->states &= ~VHardMount;
562 afs_PutVolume(tvp, READ_LOCK);
563 }
564 } else if (warn) {
565 afs_unknown_vhm = 0;
566 }
567 }
568 }
569 } /* if (hm_retry_int ... */
570 else {
571 if (acode == RX_MSGSIZE)
572 shouldRetry = 1;
573 else {
574 areq->networkError = 1;
575 /* do not promote to shouldRetry if not already */
576 if (afs_ClearStatus(afid, op, NULL) == 0)
577 shouldRetry = 0;
578 }
579 }
580 }
581 if (aconn) /* simply lacking aconn->server doesn't absolve this */
582 afs_PutConn(aconn, rxconn, locktype);
583 return shouldRetry;
584 }
585
586 /* Find server associated with this connection. */
587 sa = aconn->parent->srvr;
588 tsp = sa->server;
589 address = ntohl(sa->sa_ip);
590
591 /* Before we do anything with acode, make sure we translate it back to
592 * a system error */
593 if ((acode & ~0xff) == ERROR_TABLE_BASE_uae)
594 acode = et_to_sys_error(acode);
595
596 if (acode == 0) {
597 /* If we previously took an error, mark this volume not busy */
598 if (areq->volumeError) {
599 tvp = afs_FindVolume(afid, READ_LOCK);
600 if (tvp) {
601 for (i = 0; i < AFS_MAXHOSTS; i++) {
602 if (tvp->serverHost[i] == tsp) {
603 tvp->status[i] = not_busy;
604 }
605 }
606 afs_PutVolume(tvp, READ_LOCK);
607 }
608 }
609
610 afs_PutConn(aconn, rxconn, locktype);
611 return 0;
612 }
613
614 /* Save the last code of this server on this request. */
615 tvp = afs_FindVolume(afid, READ_LOCK);
616 if (tvp) {
617 for (i = 0; i < AFS_MAXHOSTS; i++) {
618 if (tvp->serverHost[i] == tsp) {
619 areq->lasterror[i] = acode;
620 }
621 }
622 afs_PutVolume(tvp, READ_LOCK);
623 }
624
625 #ifdef AFS_64BIT_CLIENT
626 if (acode == -455)
627 acode = 455;
628 #endif /* AFS_64BIT_CLIENT */
629 if (acode == RX_MSGSIZE) {
630 shouldRetry = 1;
631 goto out;
632 }
633 if (acode == RX_CALL_TIMEOUT || acode == VNOSERVICE) {
634 serversleft = afs_BlackListOnce(areq, afid, tsp);
635 if (afid)
636 tvp = afs_FindVolume(afid, READ_LOCK);
637 if ((serversleft == 0) && tvp &&
638 ((tvp->states & VRO) || (tvp->states & VBackup))) {
639 shouldRetry = 0;
640 } else {
641 shouldRetry = 1;
642 }
643 if (!afid || !tvp || (tvp->states & VRO))
644 areq->idleError++;
645 else if (afs_ClearStatus(afid, op, tvp) == 0)
646 shouldRetry = 0;
647
648 if (tvp)
649 afs_PutVolume(tvp, READ_LOCK);
650 /* By doing this, we avoid ever marking a server down
651 * in an idle timeout case. That's because the server is
652 * still responding and may only be letting a single vnode
653 * time out. We otherwise risk having the server continually
654 * be marked down, then up, then down again...
655 */
656 goto out;
657 }
658 /* If network troubles, mark server as having bogued out again. */
659 /* VRESTARTING is < 0 because of backward compatibility issues
660 * with 3.4 file servers and older cache managers */
661 if ((acode < 0) && (acode != VRESTARTING)) {
662 afs_ServerDown(sa, acode, rxconn);
663 ForceNewConnections(sa); /* multi homed clients lock:afs_xsrvAddr? */
664 if (aerrP)
665 (aerrP->err_Server)++;
666 }
667
668 if (acode == VBUSY || acode == VRESTARTING) {
669 if (acode == VBUSY) {
670 areq->busyCount++;
671 if (aerrP)
672 (aerrP->err_VolumeBusies)++;
673 } else
674 areq->busyCount = 1;
675
676 tvp = afs_FindVolume(afid, READ_LOCK);
677 if (tvp) {
678 for (i = 0; i < AFS_MAXHOSTS; i++) {
679 if (tvp->serverHost[i] == tsp) {
680 tvp->status[i] = rdwr_busy; /* can't tell which yet */
681 /* to tell which, have to look at the op code. */
682 }
683 }
684 afs_PutVolume(tvp, READ_LOCK);
685 } else {
686 afs_warnuser("afs: Waiting for busy volume %u in cell %s (server %d.%d.%d.%d)\n",
687 (afid ? afid->Fid.Volume : 0), tsp->cell->cellName,
688 (address >> 24), (address >> 16) & 0xff,
689 (address >> 8) & 0xff, (address) & 0xff);
690 VSleep(afs_BusyWaitPeriod); /* poll periodically */
691 }
692 shouldRetry = 1;
693 acode = 0;
694 } else if (acode == VICETOKENDEAD
695 || (acode & ~0xff) == ERROR_TABLE_BASE_RXK) {
696 /* any rxkad error is treated as token expiration */
697 struct unixuser *tu;
698 /*
699 * I'm calling these errors protection errors, since they involve
700 * faulty authentication.
701 */
702 if (aerrP)
703 (aerrP->err_Protection)++;
704
705 tu = afs_FindUser(areq->uid, tsp->cell->cellNum, READ_LOCK);
706 if (tu) {
707 if (acode == VICETOKENDEAD) {
708 aconn->forceConnectFS = 1;
709 } else if (acode == RXKADEXPIRED) {
710 aconn->forceConnectFS = 0; /* don't check until new tokens set */
711 aconn->parent->user->states |= UTokensBad;
712 afs_NotifyUser(tu, UTokensDropped);
713 afs_warnuser
714 ("afs: Tokens for user of AFS id %d for cell %s have expired (server %d.%d.%d.%d)\n",
715 tu->viceId, aconn->parent->srvr->server->cell->cellName,
716 (address >> 24), (address >> 16) & 0xff,
717 (address >> 8) & 0xff, (address) & 0xff);
718 } else {
719 serversleft = afs_BlackListOnce(areq, afid, tsp);
720 areq->tokenError++;
721
722 if (serversleft) {
723 afs_warnuser
724 ("afs: Tokens for user of AFS id %d for cell %s: rxkad error=%d (server %d.%d.%d.%d)\n",
725 tu->viceId, aconn->parent->srvr->server->cell->cellName, acode,
726 (address >> 24), (address >> 16) & 0xff,
727 (address >> 8) & 0xff, (address) & 0xff);
728 shouldRetry = 1;
729 } else {
730 areq->tokenError = 0;
731 aconn->forceConnectFS = 0; /* don't check until new tokens set */
732 aconn->parent->user->states |= UTokensBad;
733 afs_NotifyUser(tu, UTokensDropped);
734 afs_warnuser
735 ("afs: Tokens for user of AFS id %d for cell %s are discarded (rxkad error=%d, server %d.%d.%d.%d)\n",
736 tu->viceId, aconn->parent->srvr->server->cell->cellName, acode,
737 (address >> 24), (address >> 16) & 0xff,
738 (address >> 8) & 0xff, (address) & 0xff);
739 }
740 }
741 afs_PutUser(tu, READ_LOCK);
742 } else {
743 /* The else case shouldn't be possible and should probably be replaced by a panic? */
744 if (acode == VICETOKENDEAD) {
745 aconn->forceConnectFS = 1;
746 } else if (acode == RXKADEXPIRED) {
747 aconn->forceConnectFS = 0; /* don't check until new tokens set */
748 aconn->parent->user->states |= UTokensBad;
749 afs_NotifyUser(tu, UTokensDropped);
750 afs_warnuser
751 ("afs: Tokens for user %d for cell %s have expired (server %d.%d.%d.%d)\n",
752 areq->uid, aconn->parent->srvr->server->cell->cellName,
753 (address >> 24), (address >> 16) & 0xff,
754 (address >> 8) & 0xff, (address) & 0xff);
755 } else {
756 aconn->forceConnectFS = 0; /* don't check until new tokens set */
757 aconn->parent->user->states |= UTokensBad;
758 afs_NotifyUser(tu, UTokensDropped);
759 afs_warnuser
760 ("afs: Tokens for user %d for cell %s are discarded (rxkad error = %d, server %d.%d.%d.%d)\n",
761 areq->uid, aconn->parent->srvr->server->cell->cellName,
762 acode,
763 (address >> 24), (address >> 16) & 0xff,
764 (address >> 8) & 0xff, (address) & 0xff);
765
766 }
767 }
768 shouldRetry = 1; /* Try again (as root). */
769 }
770 /* Check for access violation. */
771 else if (acode == EACCES) {
772 /* should mark access error in non-existent per-user global structure */
773 if (aerrP)
774 (aerrP->err_Protection)++;
775 areq->accessError = 1;
776 if (op == AFS_STATS_FS_RPCIDX_STOREDATA)
777 areq->permWriteError = 1;
778 shouldRetry = 0;
779 }
780 /* check for ubik errors; treat them like crashed servers */
781 else if (acode >= ERROR_TABLE_BASE_U && acode < ERROR_TABLE_BASE_U + 255) {
782 afs_ServerDown(sa, acode, rxconn);
783 if (aerrP)
784 (aerrP->err_Server)++;
785 shouldRetry = 1; /* retryable (maybe one is working) */
786 VSleep(1); /* just in case */
787 }
788 /* Check for bad volume data base / missing volume. */
789 else if (acode == VSALVAGE || acode == VOFFLINE || acode == VNOVOL
790 || acode == VMOVED) {
791 struct cell *tcell;
792 int same;
793
794 shouldRetry = 1;
795 areq->volumeError = VOLMISSING;
796 if (aerrP)
797 (aerrP->err_Volume)++;
798 if (afid && (tcell = afs_GetCell(afid->Cell, 0))) {
799 same = VLDB_Same(afid, areq);
800 tvp = afs_FindVolume(afid, READ_LOCK);
801 if (tvp) {
802 for (i = 0; i < AFS_MAXHOSTS && tvp->serverHost[i]; i++) {
803 if (tvp->serverHost[i] == tsp) {
804 if (tvp->status[i] == end_not_busy)
805 tvp->status[i] = offline;
806 else
807 tvp->status[i]++;
808 } else if (!same) {
809 tvp->status[i] = not_busy; /* reset the others */
810 }
811 }
812 afs_PutVolume(tvp, READ_LOCK);
813 }
814 }
815 } else if (acode >= ERROR_TABLE_BASE_VL && acode <= ERROR_TABLE_BASE_VL + 255) { /* vlserver errors */
816 shouldRetry = 0;
817 areq->volumeError = VOLMISSING;
818 } else if (acode >= 0) {
819 if (aerrP)
820 (aerrP->err_Other)++;
821 if (op == AFS_STATS_FS_RPCIDX_STOREDATA)
822 areq->permWriteError = 1;
823 shouldRetry = 0; /* Other random Vice error. */
824 } else if (acode == RX_MSGSIZE) { /* same meaning as EMSGSIZE... */
825 afs_warnuser
826 ("afs: Path MTU may have been exceeded, retrying (server %d.%d.%d.%d)\n",
827 (address >> 24), (address >> 16) & 0xff,
828 (address >> 8) & 0xff, (address) & 0xff);
829
830 VSleep(1); /* Just a hack for desperate times. */
831 if (aerrP)
832 (aerrP->err_Other)++;
833 shouldRetry = 1; /* packet was too big, please retry call */
834 }
835
836 if (acode < 0 && acode != RX_MSGSIZE && acode != VRESTARTING) {
837 /* If we get here, code < 0 and we have network/Server troubles.
838 * areq->networkError is not set here, since we always
839 * retry in case there is another server. However, if we find
840 * no connection (aconn == 0) we set the networkError flag.
841 */
842 afs_ServerDown(sa, acode, rxconn);
843 if (aerrP)
844 (aerrP->err_Server)++;
845 VSleep(1); /* Just a hack for desperate times. */
846 shouldRetry = 1;
847 }
848 out:
849 /* now unlock the connection and return */
850 afs_PutConn(aconn, rxconn, locktype);
851 return (shouldRetry);
852 } /*afs_Analyze */