Commit | Line | Data |
---|---|---|
805e021f CE |
1 | /* |
2 | * Copyright 2000, International Business Machines Corporation and others. | |
3 | * All Rights Reserved. | |
4 | * | |
5 | * This software has been released under the terms of the IBM Public | |
6 | * License. For details, see the LICENSE file in the top-level source | |
7 | * directory or online at http://www.openafs.org/dl/license10.html | |
8 | * | |
9 | * Portions Copyright (c) 2006 Sine Nomine Associates | |
10 | */ | |
11 | ||
12 | /* | |
13 | * NEW callback package callback.c (replaces vicecb.c) | |
14 | * Updated call back routines, NOW with: | |
15 | * | |
16 | * Faster DeleteVenus (Now called DeleteAllCallBacks) | |
17 | * Call back breaking for volumes | |
18 | * Adaptive timeouts on call backs | |
19 | * Architected for Multi RPC | |
20 | * No locks (currently implicit vnode locks--these will go, to) | |
21 | * Delayed call back when rpc connection down. | |
22 | * Bulk break of delayed call backs when rpc connection | |
23 | * reestablished | |
24 | * Strict limit on number of call backs. | |
25 | * | |
26 | * InitCallBack(nblocks) | |
27 | * Initialize: nblocks is max number # of file entries + # of callback entries | |
28 | * nblocks must be < 65536 | |
29 | * Space used is nblocks*16 bytes | |
30 | * Note that space will be reclaimed by breaking callbacks of old hosts | |
31 | * | |
32 | * time = AddCallBack(host, fid) | |
33 | * Add a call back. | |
34 | * Returns the expiration time at the workstation. | |
35 | * | |
36 | * BreakCallBack(host, fid) | |
37 | * Break all call backs for fid, except for the specified host. | |
38 | * Delete all of them. | |
39 | * | |
40 | * BreakVolumeCallBacksLater(volume) | |
41 | * Break all call backs on volume, using single call to each host | |
42 | * Delete all the call backs. | |
43 | * | |
44 | * DeleteCallBack(host,fid) | |
45 | * Delete (do not break) single call back for fid. | |
46 | * | |
47 | * DeleteFileCallBacks(fid) | |
48 | * Delete (do not break) all call backs for fid. | |
49 | * | |
50 | * DeleteAllCallBacks(host) | |
51 | * Delete (do not break) all call backs for host. | |
52 | * | |
53 | * CleanupTimedOutCallBacks() | |
54 | * Delete all timed out call back entries | |
55 | * Must be called periodically by file server. | |
56 | * | |
57 | * BreakDelayedCallBacks(host) | |
58 | * Break all delayed call backs for host. | |
59 | * Returns 1: one or more failed, 0: success. | |
60 | * | |
61 | * PrintCallBackStats() | |
62 | * Print statistics about call backs to stdout. | |
63 | * | |
64 | * DumpCallBacks() ---wishful thinking--- | |
65 | * Dump call back state to /tmp/callback.state. | |
66 | * This is separately interpretable by the program pcb. | |
67 | * | |
68 | * Notes: In general, if a call back to a host doesn't get through, | |
69 | * then HostDown, supplied elsewhere, is called. BreakDelayedCallBacks, | |
70 | * however, does not call HostDown, but instead returns an indication of | |
71 | * success if all delayed call backs were finally broken. | |
72 | * | |
73 | * BreakDelayedCallBacks MUST be called at the first sign of activity | |
74 | * from the host after HostDown has been called (or a previous | |
75 | * BreakDelayedCallBacks failed). The BreakDelayedCallBacks must be | |
76 | * allowed to complete before any requests from that host are handled. | |
77 | * If BreakDelayedCallBacks fails, then the host should remain | |
78 | * down (and the request should be failed). | |
79 | ||
80 | * CleanupCallBacks MUST be called periodically by the file server for | |
81 | * this package to work correctly. Every 5 minutes is suggested. | |
82 | */ | |
83 | ||
84 | #include <afsconfig.h> | |
85 | #include <afs/param.h> | |
86 | #include <afs/stds.h> | |
87 | ||
88 | #include <roken.h> | |
89 | ||
90 | #ifdef HAVE_SYS_FILE_H | |
91 | #include <sys/file.h> | |
92 | #endif | |
93 | ||
94 | #include <afs/opr.h> | |
95 | #include <opr/lock.h> | |
96 | #include <afs/nfs.h> /* yuck. This is an abomination. */ | |
97 | #include <rx/rx.h> | |
98 | #include <rx/rx_queue.h> | |
99 | #include <afs/afscbint.h> | |
100 | #include <afs/afsutil.h> | |
101 | #include <afs/ihandle.h> | |
102 | #include <afs/partition.h> | |
103 | #include <afs/vnode.h> | |
104 | #include <afs/volume.h> | |
105 | #include "viced_prototypes.h" | |
106 | #include "viced.h" | |
107 | ||
108 | #include <afs/ptclient.h> /* need definition of prlist for host.h */ | |
109 | #include "host.h" | |
110 | #include "callback.h" | |
111 | #ifdef AFS_DEMAND_ATTACH_FS | |
112 | #include "serialize_state.h" | |
113 | #endif /* AFS_DEMAND_ATTACH_FS */ | |
114 | ||
115 | ||
116 | extern afsUUID FS_HostUUID; | |
117 | extern int hostCount; | |
118 | ||
119 | #ifndef INTERPRET_DUMP | |
120 | static int ShowProblems = 1; | |
121 | #endif | |
122 | ||
123 | struct cbcounters cbstuff; | |
124 | ||
125 | static struct FileEntry * FE = NULL; /* don't use FE[0] */ | |
126 | static struct CallBack * CB = NULL; /* don't use CB[0] */ | |
127 | ||
128 | static struct CallBack * CBfree = NULL; | |
129 | static struct FileEntry * FEfree = NULL; | |
130 | ||
131 | ||
132 | /* Time to live for call backs depends upon number of users of the file. | |
133 | * TimeOuts is indexed by this number/8 (using TimeOut macro). Times | |
134 | * in this table are for the workstation; server timeouts, add | |
135 | * ServerBias */ | |
136 | ||
137 | static int TimeOuts[] = { | |
138 | /* Note: don't make the first entry larger than 4 hours (see above) */ | |
139 | 4 * 60 * 60, /* 0-7 users */ | |
140 | 1 * 60 * 60, /* 8-15 users */ | |
141 | 30 * 60, /* 16-23 users */ | |
142 | 15 * 60, /* 24-31 users */ | |
143 | 15 * 60, /* 32-39 users */ | |
144 | 10 * 60, /* 40-47 users */ | |
145 | 10 * 60, /* 48-55 users */ | |
146 | 10 * 60, /* 56-63 users */ | |
147 | }; /* Anything more: MinTimeOut */ | |
148 | ||
149 | /* minimum time given for a call back */ | |
150 | #ifndef INTERPRET_DUMP | |
151 | static int MinTimeOut = (7 * 60); | |
152 | #endif | |
153 | ||
154 | /* Heads of CB queues; a timeout index is 1+index into this array */ | |
155 | static afs_uint32 timeout[CB_NUM_TIMEOUT_QUEUES]; | |
156 | ||
157 | static afs_int32 tfirst; /* cbtime of oldest unexpired call back time queue */ | |
158 | ||
159 | ||
160 | /* 16 byte object get/free routines */ | |
161 | struct object { | |
162 | struct object *next; | |
163 | }; | |
164 | ||
165 | /* Prototypes for static routines */ | |
166 | static struct FileEntry *FindFE(AFSFid * fid); | |
167 | ||
168 | #ifndef INTERPRET_DUMP | |
169 | static struct CallBack *iGetCB(int *nused); | |
170 | static int iFreeCB(struct CallBack *cb, int *nused); | |
171 | static struct FileEntry *iGetFE(int *nused); | |
172 | static int iFreeFE(struct FileEntry *fe, int *nused); | |
173 | static int TAdd(struct CallBack *cb, afs_uint32 * thead); | |
174 | static int TDel(struct CallBack *cb); | |
175 | static int HAdd(struct CallBack *cb, struct host *host); | |
176 | static int HDel(struct CallBack *cb); | |
177 | static int CDel(struct CallBack *cb, int deletefe); | |
178 | static int CDelPtr(struct FileEntry *fe, afs_uint32 * cbp, | |
179 | int deletefe); | |
180 | static afs_uint32 *FindCBPtr(struct FileEntry *fe, struct host *host); | |
181 | static int FDel(struct FileEntry *fe); | |
182 | static int AddCallBack1_r(struct host *host, AFSFid * fid, afs_uint32 * thead, | |
183 | int type, int locked); | |
184 | static void MultiBreakCallBack_r(struct cbstruct cba[], int ncbas, | |
185 | struct AFSCBFids *afidp); | |
186 | static int MultiBreakVolumeCallBack_r(struct host *host, | |
187 | struct VCBParams *parms, int deletefe); | |
188 | static int MultiBreakVolumeLaterCallBack(struct host *host, void *rock); | |
189 | static int GetSomeSpace_r(struct host *hostp, int locked); | |
190 | static int ClearHostCallbacks_r(struct host *hp, int locked); | |
191 | static int DumpCallBackState_r(void); | |
192 | #endif | |
193 | ||
194 | #define GetCB() ((struct CallBack *)iGetCB(&cbstuff.nCBs)) | |
195 | #define GetFE() ((struct FileEntry *)iGetFE(&cbstuff.nFEs)) | |
196 | #define FreeCB(cb) iFreeCB((struct CallBack *)cb, &cbstuff.nCBs) | |
197 | #define FreeFE(fe) iFreeFE((struct FileEntry *)fe, &cbstuff.nFEs) | |
198 | ||
199 | ||
200 | /* Other protos - move out sometime */ | |
201 | void PrintCB(struct CallBack *cb, afs_uint32 now); | |
202 | ||
203 | static afs_uint32 HashTable[FEHASH_SIZE]; /* File entry hash table */ | |
204 | ||
205 | static struct FileEntry * | |
206 | FindFE(AFSFid * fid) | |
207 | { | |
208 | int hash; | |
209 | int fei; | |
210 | struct FileEntry *fe; | |
211 | ||
212 | hash = FEHash(fid->Volume, fid->Unique); | |
213 | for (fei = HashTable[hash]; fei; fei = fe->fnext) { | |
214 | fe = itofe(fei); | |
215 | if (fe->volid == fid->Volume && fe->unique == fid->Unique | |
216 | && fe->vnode == fid->Vnode && (fe->status & FE_LATER) != FE_LATER) | |
217 | return fe; | |
218 | } | |
219 | return 0; | |
220 | } | |
221 | ||
222 | #ifndef INTERPRET_DUMP | |
223 | ||
224 | static struct CallBack * | |
225 | iGetCB(int *nused) | |
226 | { | |
227 | struct CallBack *ret; | |
228 | ||
229 | if ((ret = CBfree)) { | |
230 | CBfree = (struct CallBack *)(((struct object *)ret)->next); | |
231 | (*nused)++; | |
232 | } | |
233 | return ret; | |
234 | } | |
235 | ||
236 | static int | |
237 | iFreeCB(struct CallBack *cb, int *nused) | |
238 | { | |
239 | ((struct object *)cb)->next = (struct object *)CBfree; | |
240 | CBfree = cb; | |
241 | (*nused)--; | |
242 | return 0; | |
243 | } | |
244 | ||
245 | static struct FileEntry * | |
246 | iGetFE(int *nused) | |
247 | { | |
248 | struct FileEntry *ret; | |
249 | ||
250 | if ((ret = FEfree)) { | |
251 | FEfree = (struct FileEntry *)(((struct object *)ret)->next); | |
252 | (*nused)++; | |
253 | } | |
254 | return ret; | |
255 | } | |
256 | ||
257 | static int | |
258 | iFreeFE(struct FileEntry *fe, int *nused) | |
259 | { | |
260 | ((struct object *)fe)->next = (struct object *)FEfree; | |
261 | FEfree = fe; | |
262 | (*nused)--; | |
263 | return 0; | |
264 | } | |
265 | ||
266 | /* Add cb to end of specified timeout list */ | |
267 | static int | |
268 | TAdd(struct CallBack *cb, afs_uint32 * thead) | |
269 | { | |
270 | if (!*thead) { | |
271 | (*thead) = cb->tnext = cb->tprev = cbtoi(cb); | |
272 | } else { | |
273 | struct CallBack *thp = itocb(*thead); | |
274 | ||
275 | cb->tprev = thp->tprev; | |
276 | cb->tnext = *thead; | |
277 | if (thp) { | |
278 | if (thp->tprev) | |
279 | thp->tprev = (itocb(thp->tprev)->tnext = cbtoi(cb)); | |
280 | else | |
281 | thp->tprev = cbtoi(cb); | |
282 | } | |
283 | } | |
284 | cb->thead = ttoi(thead); | |
285 | return 0; | |
286 | } | |
287 | ||
288 | /* Delete call back entry from timeout list */ | |
289 | static int | |
290 | TDel(struct CallBack *cb) | |
291 | { | |
292 | afs_uint32 *thead = itot(cb->thead); | |
293 | ||
294 | if (*thead == cbtoi(cb)) | |
295 | *thead = (*thead == cb->tnext ? 0 : cb->tnext); | |
296 | if (itocb(cb->tprev)) | |
297 | itocb(cb->tprev)->tnext = cb->tnext; | |
298 | if (itocb(cb->tnext)) | |
299 | itocb(cb->tnext)->tprev = cb->tprev; | |
300 | return 0; | |
301 | } | |
302 | ||
303 | /* Add cb to end of specified host list */ | |
304 | static int | |
305 | HAdd(struct CallBack *cb, struct host *host) | |
306 | { | |
307 | cb->hhead = h_htoi(host); | |
308 | if (!host->z.cblist) { | |
309 | host->z.cblist = cb->hnext = cb->hprev = cbtoi(cb); | |
310 | } else { | |
311 | struct CallBack *fcb = itocb(host->z.cblist); | |
312 | ||
313 | cb->hprev = fcb->hprev; | |
314 | cb->hnext = cbtoi(fcb); | |
315 | fcb->hprev = (itocb(fcb->hprev)->hnext = cbtoi(cb)); | |
316 | } | |
317 | return 0; | |
318 | } | |
319 | ||
320 | /* Delete call back entry from host list */ | |
321 | static int | |
322 | HDel(struct CallBack *cb) | |
323 | { | |
324 | afs_uint32 *hhead = &h_itoh(cb->hhead)->z.cblist; | |
325 | ||
326 | if (*hhead == cbtoi(cb)) | |
327 | *hhead = (*hhead == cb->hnext ? 0 : cb->hnext); | |
328 | itocb(cb->hprev)->hnext = cb->hnext; | |
329 | itocb(cb->hnext)->hprev = cb->hprev; | |
330 | return 0; | |
331 | } | |
332 | ||
333 | /* Delete call back entry from fid's chain of cb's */ | |
334 | /* N.B. This one also deletes the CB, and also possibly parent FE, so | |
335 | * make sure that it is not on any other list before calling this | |
336 | * routine */ | |
337 | static int | |
338 | CDel(struct CallBack *cb, int deletefe) | |
339 | { | |
340 | int cbi = cbtoi(cb); | |
341 | struct FileEntry *fe = itofe(cb->fhead); | |
342 | afs_uint32 *cbp; | |
343 | int safety; | |
344 | ||
345 | for (safety = 0, cbp = &fe->firstcb; *cbp && *cbp != cbi; | |
346 | cbp = &itocb(*cbp)->cnext, safety++) { | |
347 | if (safety > cbstuff.nblks + 10) { | |
348 | ViceLogThenPanic(0, ("CDel: Internal Error -- shutting down: " | |
349 | "wanted %d from %d, now at %d\n", | |
350 | cbi, fe->firstcb, *cbp)); | |
351 | DumpCallBackState_r(); | |
352 | ShutDownAndCore(PANIC); | |
353 | } | |
354 | } | |
355 | CDelPtr(fe, cbp, deletefe); | |
356 | return 0; | |
357 | } | |
358 | ||
359 | /* Same as CDel, but pointer to parent pointer to CB entry is passed, | |
360 | * as well as file entry */ | |
361 | /* N.B. This one also deletes the CB, and also possibly parent FE, so | |
362 | * make sure that it is not on any other list before calling this | |
363 | * routine */ | |
364 | static int Ccdelpt = 0, CcdelB = 0; | |
365 | ||
366 | static int | |
367 | CDelPtr(struct FileEntry *fe, afs_uint32 * cbp, | |
368 | int deletefe) | |
369 | { | |
370 | struct CallBack *cb; | |
371 | ||
372 | if (!*cbp) | |
373 | return 0; | |
374 | Ccdelpt++; | |
375 | cb = itocb(*cbp); | |
376 | if (cb != &CB[*cbp]) | |
377 | CcdelB++; | |
378 | *cbp = cb->cnext; | |
379 | FreeCB(cb); | |
380 | if ((--fe->ncbs == 0) && deletefe) | |
381 | FDel(fe); | |
382 | return 0; | |
383 | } | |
384 | ||
385 | static afs_uint32 * | |
386 | FindCBPtr(struct FileEntry *fe, struct host *host) | |
387 | { | |
388 | afs_uint32 hostindex = h_htoi(host); | |
389 | struct CallBack *cb; | |
390 | afs_uint32 *cbp; | |
391 | int safety; | |
392 | ||
393 | for (safety = 0, cbp = &fe->firstcb; *cbp; cbp = &cb->cnext, safety++) { | |
394 | if (safety > cbstuff.nblks) { | |
395 | ViceLog(0, ("FindCBPtr: Internal Error -- shutting down.\n")); | |
396 | DumpCallBackState_r(); | |
397 | ShutDownAndCore(PANIC); | |
398 | } | |
399 | cb = itocb(*cbp); | |
400 | if (cb->hhead == hostindex) | |
401 | break; | |
402 | } | |
403 | return cbp; | |
404 | } | |
405 | ||
406 | /* Delete file entry from hash table */ | |
407 | static int | |
408 | FDel(struct FileEntry *fe) | |
409 | { | |
410 | int fei = fetoi(fe); | |
411 | afs_uint32 *p = &HashTable[FEHash(fe->volid, fe->unique)]; | |
412 | ||
413 | while (*p && *p != fei) | |
414 | p = &itofe(*p)->fnext; | |
415 | opr_Assert(*p); | |
416 | *p = fe->fnext; | |
417 | FreeFE(fe); | |
418 | return 0; | |
419 | } | |
420 | ||
421 | /* initialize the callback package */ | |
422 | int | |
423 | InitCallBack(int nblks) | |
424 | { | |
425 | opr_Assert(nblks > 0); | |
426 | ||
427 | H_LOCK; | |
428 | tfirst = CBtime(time(NULL)); | |
429 | /* N.B. The "-1", below, is because | |
430 | * FE[0] and CB[0] are not used--and not allocated */ | |
431 | FE = calloc(nblks, sizeof(struct FileEntry)); | |
432 | if (!FE) { | |
433 | ViceLogThenPanic(0, ("Failed malloc in InitCallBack\n")); | |
434 | } | |
435 | FE--; /* FE[0] is supposed to point to junk */ | |
436 | cbstuff.nFEs = nblks; | |
437 | while (cbstuff.nFEs) | |
438 | FreeFE(&FE[cbstuff.nFEs]); /* This is correct */ | |
439 | CB = calloc(nblks, sizeof(struct CallBack)); | |
440 | if (!CB) { | |
441 | ViceLogThenPanic(0, ("Failed malloc in InitCallBack\n")); | |
442 | } | |
443 | CB--; /* CB[0] is supposed to point to junk */ | |
444 | cbstuff.nCBs = nblks; | |
445 | while (cbstuff.nCBs) | |
446 | FreeCB(&CB[cbstuff.nCBs]); /* This is correct */ | |
447 | cbstuff.nblks = nblks; | |
448 | cbstuff.nbreakers = 0; | |
449 | H_UNLOCK; | |
450 | return 0; | |
451 | } | |
452 | ||
453 | afs_int32 | |
454 | XCallBackBulk_r(struct host * ahost, struct AFSFid * fids, afs_int32 nfids) | |
455 | { | |
456 | struct AFSCallBack tcbs[AFSCBMAX]; | |
457 | int i; | |
458 | struct AFSCBFids tf; | |
459 | struct AFSCBs tc; | |
460 | int code; | |
461 | int j; | |
462 | struct rx_connection *cb_conn = NULL; | |
463 | ||
464 | rx_SetConnDeadTime(ahost->z.callback_rxcon, 4); | |
465 | rx_SetConnHardDeadTime(ahost->z.callback_rxcon, AFS_HARDDEADTIME); | |
466 | ||
467 | code = 0; | |
468 | j = 0; | |
469 | while (nfids > 0) { | |
470 | ||
471 | for (i = 0; i < nfids && i < AFSCBMAX; i++) { | |
472 | tcbs[i].CallBackVersion = CALLBACK_VERSION; | |
473 | tcbs[i].ExpirationTime = 0; | |
474 | tcbs[i].CallBackType = CB_DROPPED; | |
475 | } | |
476 | tf.AFSCBFids_len = i; | |
477 | tf.AFSCBFids_val = &(fids[j]); | |
478 | nfids -= i; | |
479 | j += i; | |
480 | tc.AFSCBs_len = i; | |
481 | tc.AFSCBs_val = tcbs; | |
482 | ||
483 | cb_conn = ahost->z.callback_rxcon; | |
484 | rx_GetConnection(cb_conn); | |
485 | H_UNLOCK; | |
486 | code |= RXAFSCB_CallBack(cb_conn, &tf, &tc); | |
487 | rx_PutConnection(cb_conn); | |
488 | cb_conn = NULL; | |
489 | H_LOCK; | |
490 | } | |
491 | ||
492 | return code; | |
493 | } | |
494 | ||
495 | /* the locked flag tells us if the host entry has already been locked | |
496 | * by our parent. I don't think anybody actually calls us with the | |
497 | * host locked, but here's how to make that work: GetSomeSpace has to | |
498 | * change so that it doesn't attempt to lock any hosts < "host". That | |
499 | * means that it might be unable to free any objects, so it has to | |
500 | * return an exit status. If it fails, then AddCallBack1 might fail, | |
501 | * as well. If so, the host->ResetDone should probably be set to 0, | |
502 | * and we probably don't want to return a callback promise to the | |
503 | * cache manager, either. */ | |
504 | int | |
505 | AddCallBack1(struct host *host, AFSFid * fid, afs_uint32 * thead, int type, | |
506 | int locked) | |
507 | { | |
508 | int retVal = 0; | |
509 | H_LOCK; | |
510 | if (!locked) { | |
511 | h_Lock_r(host); | |
512 | } | |
513 | if (!(host->z.hostFlags & HOSTDELETED)) | |
514 | retVal = AddCallBack1_r(host, fid, thead, type, 1); | |
515 | ||
516 | if (!locked) { | |
517 | h_Unlock_r(host); | |
518 | } | |
519 | H_UNLOCK; | |
520 | return retVal; | |
521 | } | |
522 | ||
523 | static int | |
524 | AddCallBack1_r(struct host *host, AFSFid * fid, afs_uint32 * thead, int type, | |
525 | int locked) | |
526 | { | |
527 | struct FileEntry *fe; | |
528 | struct CallBack *cb = 0, *lastcb = 0; | |
529 | struct FileEntry *newfe = 0; | |
530 | afs_uint32 time_out = 0; | |
531 | afs_uint32 *Thead = thead; | |
532 | struct CallBack *newcb = 0; | |
533 | int safety; | |
534 | ||
535 | cbstuff.AddCallBacks++; | |
536 | ||
537 | host->z.Console |= 2; | |
538 | ||
539 | /* allocate these guys first, since we can't call the allocator with | |
540 | * the host structure locked -- or we might deadlock. However, we have | |
541 | * to avoid races with FindFE... */ | |
542 | while (!(newcb = GetCB())) { | |
543 | GetSomeSpace_r(host, locked); | |
544 | } | |
545 | while (!(newfe = GetFE())) { /* Get it now, so we don't have to call */ | |
546 | /* GetSomeSpace with the host locked, later. This might turn out to */ | |
547 | /* have been unneccessary, but that's actually kind of unlikely, since */ | |
548 | /* most files are not shared. */ | |
549 | GetSomeSpace_r(host, locked); | |
550 | } | |
551 | ||
552 | if (!locked) { | |
553 | h_Lock_r(host); /* this can yield, so do it before we get any */ | |
554 | /* fragile info */ | |
555 | if (host->z.hostFlags & HOSTDELETED) { | |
556 | host->z.Console &= ~2; | |
557 | h_Unlock_r(host); | |
558 | return 0; | |
559 | } | |
560 | } | |
561 | ||
562 | fe = FindFE(fid); | |
563 | if (type == CB_NORMAL) { | |
564 | time_out = | |
565 | TimeCeiling(time(NULL) + TimeOut(fe ? fe->ncbs : 0) + | |
566 | ServerBias); | |
567 | Thead = THead(CBtime(time_out)); | |
568 | } else if (type == CB_VOLUME) { | |
569 | time_out = TimeCeiling((60 * 120 + time(NULL)) + ServerBias); | |
570 | Thead = THead(CBtime(time_out)); | |
571 | } else if (type == CB_BULK) { | |
572 | /* bulk status can get so many callbacks all at once, and most of them | |
573 | * are probably not for things that will be used for long. | |
574 | */ | |
575 | time_out = | |
576 | TimeCeiling(time(NULL) + ServerBias + | |
577 | TimeOut(22 + (fe ? fe->ncbs : 0))); | |
578 | Thead = THead(CBtime(time_out)); | |
579 | } | |
580 | ||
581 | host->z.Console &= ~2; | |
582 | ||
583 | if (!fe) { | |
584 | afs_uint32 hash; | |
585 | ||
586 | fe = newfe; | |
587 | newfe = NULL; | |
588 | fe->firstcb = 0; | |
589 | fe->volid = fid->Volume; | |
590 | fe->vnode = fid->Vnode; | |
591 | fe->unique = fid->Unique; | |
592 | fe->ncbs = 0; | |
593 | fe->status = 0; | |
594 | hash = FEHash(fid->Volume, fid->Unique); | |
595 | fe->fnext = HashTable[hash]; | |
596 | HashTable[hash] = fetoi(fe); | |
597 | } | |
598 | for (safety = 0, lastcb = cb = itocb(fe->firstcb); cb; | |
599 | lastcb = cb, cb = itocb(cb->cnext), safety++) { | |
600 | if (safety > cbstuff.nblks) { | |
601 | ViceLog(0, ("AddCallBack1: Internal Error -- shutting down.\n")); | |
602 | DumpCallBackState_r(); | |
603 | ShutDownAndCore(PANIC); | |
604 | } | |
605 | if (cb->hhead == h_htoi(host)) | |
606 | break; | |
607 | } | |
608 | if (cb) { /* Already have call back: move to new timeout list */ | |
609 | /* don't change delayed callbacks back to normal ones */ | |
610 | if (cb->status != CB_DELAYED) | |
611 | cb->status = type; | |
612 | /* Only move if new timeout is longer */ | |
613 | if (TNorm(ttoi(Thead)) > TNorm(cb->thead)) { | |
614 | TDel(cb); | |
615 | TAdd(cb, Thead); | |
616 | } | |
617 | if (newfe == NULL) { /* we are using the new FE */ | |
618 | fe->firstcb = cbtoi(cb); | |
619 | fe->ncbs++; | |
620 | cb->fhead = fetoi(fe); | |
621 | } | |
622 | } else { | |
623 | cb = newcb; | |
624 | newcb = NULL; | |
625 | *(lastcb ? &lastcb->cnext : &fe->firstcb) = cbtoi(cb); | |
626 | fe->ncbs++; | |
627 | cb->cnext = 0; | |
628 | cb->fhead = fetoi(fe); | |
629 | cb->status = type; | |
630 | cb->flags = 0; | |
631 | HAdd(cb, host); | |
632 | TAdd(cb, Thead); | |
633 | } | |
634 | ||
635 | /* now free any still-unused callback or host entries */ | |
636 | if (newcb) | |
637 | FreeCB(newcb); | |
638 | if (newfe) | |
639 | FreeFE(newfe); | |
640 | ||
641 | if (!locked) /* freecb and freefe might(?) yield */ | |
642 | h_Unlock_r(host); | |
643 | ||
644 | if (type == CB_NORMAL || type == CB_VOLUME || type == CB_BULK) | |
645 | return time_out - ServerBias; /* Expires sooner at workstation */ | |
646 | ||
647 | return 0; | |
648 | } | |
649 | ||
650 | static int | |
651 | CompareCBA(const void *e1, const void *e2) | |
652 | { | |
653 | const struct cbstruct *cba1 = (const struct cbstruct *)e1; | |
654 | const struct cbstruct *cba2 = (const struct cbstruct *)e2; | |
655 | return ((cba1->hp)->index - (cba2->hp)->index); | |
656 | } | |
657 | ||
658 | /* Take an array full of hosts, all held. Break callbacks to them, and | |
659 | * release the holds once you're done. | |
660 | * Currently only works for a single Fid in afidp array. | |
661 | * If you want to make this work with multiple fids, you need to fix | |
662 | * the error handling. One approach would be to force a reset if a | |
663 | * multi-fid call fails, or you could add delayed callbacks for each | |
664 | * fid. You probably also need to sort and remove duplicate hosts. | |
665 | * When this is called from the BreakVolumeCallBacks path, it does NOT | |
666 | * force a reset if the RPC fails, it just marks the host down and tries | |
667 | * to create a delayed callback. */ | |
668 | /* N.B. be sure that code works when ncbas == 0 */ | |
669 | /* N.B. requires all the cba[*].hp pointers to be valid... */ | |
670 | /* This routine does not hold a lock on the host for the duration of | |
671 | * the BreakCallBack RPC, which is a significant deviation from tradition. | |
672 | * It _does_ get a lock on the host before setting VenusDown = 1, | |
673 | * which is sufficient only if VenusDown = 0 only happens when the | |
674 | * lock is held over the RPC and the subsequent VenusDown == 0 | |
675 | * wherever that is done. */ | |
676 | static void | |
677 | MultiBreakCallBack_r(struct cbstruct cba[], int ncbas, | |
678 | struct AFSCBFids *afidp) | |
679 | { | |
680 | int i, j; | |
681 | struct rx_connection *conns[MAX_CB_HOSTS]; | |
682 | static struct AFSCBs tc = { 0, 0 }; | |
683 | int multi_to_cba_map[MAX_CB_HOSTS]; | |
684 | ||
685 | opr_Assert(ncbas <= MAX_CB_HOSTS); | |
686 | ||
687 | /* | |
688 | * When we issue a multi_Rx callback break, we must rx_NewCall a call for | |
689 | * each host before we do anything. If there are no call channels | |
690 | * available on the conn, we must wait for one of the existing calls to | |
691 | * finish. If another thread is breaking callbacks at the same time, it is | |
692 | * possible for us to be waiting on NewCall for one of their multi_Rx | |
693 | * CallBack calls to finish, but they are waiting on NewCall for one of | |
694 | * our calls to finish. So we deadlock. | |
695 | * | |
696 | * This can be thought of as similar to obtaining multiple locks at the | |
697 | * same time. So if we establish an ordering, the possibility of deadlock | |
698 | * goes away. Here we provide such an ordering, by sorting our CBAs | |
699 | * according to CompareCBA. | |
700 | */ | |
701 | qsort(cba, ncbas, sizeof(struct cbstruct), CompareCBA); | |
702 | ||
703 | /* set up conns for multi-call */ | |
704 | for (i = 0, j = 0; i < ncbas; i++) { | |
705 | struct host *thishost = cba[i].hp; | |
706 | if (!thishost || (thishost->z.hostFlags & HOSTDELETED)) { | |
707 | continue; | |
708 | } | |
709 | rx_GetConnection(thishost->z.callback_rxcon); | |
710 | multi_to_cba_map[j] = i; | |
711 | conns[j++] = thishost->z.callback_rxcon; | |
712 | ||
713 | rx_SetConnDeadTime(thishost->z.callback_rxcon, 4); | |
714 | rx_SetConnHardDeadTime(thishost->z.callback_rxcon, AFS_HARDDEADTIME); | |
715 | } | |
716 | ||
717 | if (j) { /* who knows what multi would do with 0 conns? */ | |
718 | cbstuff.nbreakers++; | |
719 | H_UNLOCK; | |
720 | multi_Rx(conns, j) { | |
721 | multi_RXAFSCB_CallBack(afidp, &tc); | |
722 | if (multi_error) { | |
723 | afs_uint32 idx; | |
724 | struct host *hp; | |
725 | char hoststr[16]; | |
726 | ||
727 | i = multi_to_cba_map[multi_i]; | |
728 | hp = cba[i].hp; | |
729 | idx = cba[i].thead; | |
730 | ||
731 | if (!hp || !idx) { | |
732 | ViceLog(0, | |
733 | ("BCB: INTERNAL ERROR: hp=%p, cba=%p, thead=%u\n", | |
734 | hp, cba, idx)); | |
735 | } else { | |
736 | /* | |
737 | ** try breaking callbacks on alternate interface addresses | |
738 | */ | |
739 | if (MultiBreakCallBackAlternateAddress(hp, afidp)) { | |
740 | if (ShowProblems) { | |
741 | ViceLog(7, | |
742 | ("BCB: Failed on file %u.%u.%u, " | |
743 | "Host %p (%s:%d) is down\n", | |
744 | afidp->AFSCBFids_val->Volume, | |
745 | afidp->AFSCBFids_val->Vnode, | |
746 | afidp->AFSCBFids_val->Unique, | |
747 | hp, | |
748 | afs_inet_ntoa_r(hp->z.host, hoststr), | |
749 | ntohs(hp->z.port))); | |
750 | } | |
751 | ||
752 | H_LOCK; | |
753 | h_Lock_r(hp); | |
754 | if (!(hp->z.hostFlags & HOSTDELETED)) { | |
755 | hp->z.hostFlags |= VENUSDOWN; | |
756 | /** | |
757 | * We always go into AddCallBack1_r with the host locked | |
758 | */ | |
759 | AddCallBack1_r(hp, afidp->AFSCBFids_val, itot(idx), | |
760 | CB_DELAYED, 1); | |
761 | } | |
762 | h_Unlock_r(hp); | |
763 | H_UNLOCK; | |
764 | } | |
765 | } | |
766 | } | |
767 | } | |
768 | multi_End; | |
769 | H_LOCK; | |
770 | cbstuff.nbreakers--; | |
771 | } | |
772 | ||
773 | for (i = 0; i < ncbas; i++) { | |
774 | struct host *hp; | |
775 | hp = cba[i].hp; | |
776 | if (hp) { | |
777 | h_Release_r(hp); | |
778 | } | |
779 | } | |
780 | ||
781 | /* H_UNLOCK around this so h_FreeConnection does not deadlock. | |
782 | h_FreeConnection should *never* be called on a callback connection, | |
783 | but on 10/27/04 a deadlock occurred where it was, when we know why, | |
784 | this should be reverted. -- shadow */ | |
785 | H_UNLOCK; | |
786 | for (i = 0; i < j; i++) { | |
787 | rx_PutConnection(conns[i]); | |
788 | } | |
789 | H_LOCK; | |
790 | ||
791 | return; | |
792 | } | |
793 | ||
794 | /* | |
795 | * Break all call backs for fid, except for the specified host (unless flag | |
796 | * is true, in which case all get a callback message. Assumption: the specified | |
797 | * host is h_Held, by the caller; the others aren't. | |
798 | * Specified host may be bogus, that's ok. This used to check to see if the | |
799 | * host was down in two places, once right after the host was h_held, and | |
800 | * again after it was locked. That race condition is incredibly rare and | |
801 | * relatively harmless even when it does occur, so we don't check for it now. | |
802 | */ | |
803 | /* if flag is true, send a break callback msg to "host", too */ | |
804 | int | |
805 | BreakCallBack(struct host *xhost, AFSFid * fid, int flag) | |
806 | { | |
807 | struct FileEntry *fe; | |
808 | struct CallBack *cb, *nextcb; | |
809 | struct cbstruct cba[MAX_CB_HOSTS]; | |
810 | int ncbas; | |
811 | struct AFSCBFids tf; | |
812 | int hostindex; | |
813 | char hoststr[16]; | |
814 | ||
815 | if (xhost) | |
816 | ViceLog(7, | |
817 | ("BCB: BreakCallBack(Host %p all but %s:%d, (%u,%u,%u))\n", | |
818 | xhost, afs_inet_ntoa_r(xhost->z.host, hoststr), ntohs(xhost->z.port), | |
819 | fid->Volume, fid->Vnode, fid->Unique)); | |
820 | else | |
821 | ViceLog(7, | |
822 | ("BCB: BreakCallBack(No Host, (%u,%u,%u))\n", | |
823 | fid->Volume, fid->Vnode, fid->Unique)); | |
824 | ||
825 | H_LOCK; | |
826 | cbstuff.BreakCallBacks++; | |
827 | fe = FindFE(fid); | |
828 | if (!fe) { | |
829 | goto done; | |
830 | } | |
831 | hostindex = xhost ? h_htoi(xhost) : 0; | |
832 | cb = itocb(fe->firstcb); | |
833 | if (!cb || ((fe->ncbs == 1) && (cb->hhead == hostindex) && !flag)) { | |
834 | /* the most common case is what follows the || */ | |
835 | goto done; | |
836 | } | |
837 | tf.AFSCBFids_len = 1; | |
838 | tf.AFSCBFids_val = fid; | |
839 | ||
840 | /* Set CBFLAG_BREAKING flag on all CBs we're looking at. We do this so we | |
841 | * can loop through all relevant CBs while dropping H_LOCK, and not lose | |
842 | * track of which CBs we want to look at. If we look at all CBs over and | |
843 | * over again, we can loop indefinitely as new CBs are added. */ | |
844 | for (; cb; cb = nextcb) { | |
845 | nextcb = itocb(cb->cnext); | |
846 | ||
847 | if ((cb->hhead != hostindex || flag) | |
848 | && (cb->status == CB_BULK || cb->status == CB_NORMAL | |
849 | || cb->status == CB_VOLUME)) { | |
850 | cb->flags |= CBFLAG_BREAKING; | |
851 | } | |
852 | } | |
853 | ||
854 | cb = itocb(fe->firstcb); | |
855 | opr_Assert(cb); | |
856 | ||
857 | /* loop through all CBs, only looking at ones with the CBFLAG_BREAKING | |
858 | * flag set */ | |
859 | for (; cb;) { | |
860 | for (ncbas = 0; cb && ncbas < MAX_CB_HOSTS; cb = nextcb) { | |
861 | nextcb = itocb(cb->cnext); | |
862 | if ((cb->flags & CBFLAG_BREAKING)) { | |
863 | struct host *thishost = h_itoh(cb->hhead); | |
864 | cb->flags &= ~CBFLAG_BREAKING; | |
865 | if (!thishost) { | |
866 | ViceLog(0, ("BCB: BOGUS! cb->hhead is NULL!\n")); | |
867 | } else if (thishost->z.hostFlags & VENUSDOWN) { | |
868 | ViceLog(7, | |
869 | ("BCB: %p (%s:%d) is down; delaying break call back\n", | |
870 | thishost, afs_inet_ntoa_r(thishost->z.host, hoststr), | |
871 | ntohs(thishost->z.port))); | |
872 | cb->status = CB_DELAYED; | |
873 | } else { | |
874 | if (!(thishost->z.hostFlags & HOSTDELETED)) { | |
875 | h_Hold_r(thishost); | |
876 | cba[ncbas].hp = thishost; | |
877 | cba[ncbas].thead = cb->thead; | |
878 | ncbas++; | |
879 | } | |
880 | TDel(cb); | |
881 | HDel(cb); | |
882 | CDel(cb, 1); /* Usually first; so this delete | |
883 | * is reasonably inexpensive */ | |
884 | } | |
885 | } | |
886 | } | |
887 | ||
888 | if (ncbas) { | |
889 | MultiBreakCallBack_r(cba, ncbas, &tf); | |
890 | ||
891 | /* we need to to all these initializations again because MultiBreakCallBack may block */ | |
892 | fe = FindFE(fid); | |
893 | if (!fe) { | |
894 | goto done; | |
895 | } | |
896 | cb = itocb(fe->firstcb); | |
897 | if (!cb || ((fe->ncbs == 1) && (cb->hhead == hostindex) && !flag)) { | |
898 | /* the most common case is what follows the || */ | |
899 | goto done; | |
900 | } | |
901 | } | |
902 | } | |
903 | ||
904 | done: | |
905 | H_UNLOCK; | |
906 | return 0; | |
907 | } | |
908 | ||
909 | /* Delete (do not break) single call back for fid */ | |
910 | int | |
911 | DeleteCallBack(struct host *host, AFSFid * fid) | |
912 | { | |
913 | struct FileEntry *fe; | |
914 | afs_uint32 *pcb; | |
915 | char hoststr[16]; | |
916 | ||
917 | H_LOCK; | |
918 | cbstuff.DeleteCallBacks++; | |
919 | ||
920 | h_Lock_r(host); | |
921 | /* do not care if the host has been HOSTDELETED */ | |
922 | fe = FindFE(fid); | |
923 | if (!fe) { | |
924 | h_Unlock_r(host); | |
925 | H_UNLOCK; | |
926 | ViceLog(8, | |
927 | ("DCB: No call backs for fid (%u, %u, %u)\n", fid->Volume, | |
928 | fid->Vnode, fid->Unique)); | |
929 | return 0; | |
930 | } | |
931 | pcb = FindCBPtr(fe, host); | |
932 | if (!*pcb) { | |
933 | ViceLog(8, | |
934 | ("DCB: No call back for host %p (%s:%d), (%u, %u, %u)\n", | |
935 | host, afs_inet_ntoa_r(host->z.host, hoststr), ntohs(host->z.port), | |
936 | fid->Volume, fid->Vnode, fid->Unique)); | |
937 | h_Unlock_r(host); | |
938 | H_UNLOCK; | |
939 | return 0; | |
940 | } | |
941 | HDel(itocb(*pcb)); | |
942 | TDel(itocb(*pcb)); | |
943 | CDelPtr(fe, pcb, 1); | |
944 | h_Unlock_r(host); | |
945 | H_UNLOCK; | |
946 | return 0; | |
947 | } | |
948 | ||
949 | /* | |
950 | * Delete (do not break) all call backs for fid. This call doesn't | |
951 | * set all of the various host locks, but it shouldn't really matter | |
952 | * since we're not adding callbacks, but deleting them. I'm not sure | |
953 | * why it doesn't set the lock, however; perhaps it should. | |
954 | */ | |
955 | int | |
956 | DeleteFileCallBacks(AFSFid * fid) | |
957 | { | |
958 | struct FileEntry *fe; | |
959 | struct CallBack *cb; | |
960 | afs_uint32 cbi; | |
961 | int n; | |
962 | ||
963 | H_LOCK; | |
964 | cbstuff.DeleteFiles++; | |
965 | fe = FindFE(fid); | |
966 | if (!fe) { | |
967 | H_UNLOCK; | |
968 | ViceLog(8, | |
969 | ("DF: No fid (%u,%u,%u) to delete\n", fid->Volume, fid->Vnode, | |
970 | fid->Unique)); | |
971 | return 0; | |
972 | } | |
973 | for (n = 0, cbi = fe->firstcb; cbi; n++) { | |
974 | cb = itocb(cbi); | |
975 | cbi = cb->cnext; | |
976 | TDel(cb); | |
977 | HDel(cb); | |
978 | FreeCB(cb); | |
979 | fe->ncbs--; | |
980 | } | |
981 | FDel(fe); | |
982 | H_UNLOCK; | |
983 | return 0; | |
984 | } | |
985 | ||
986 | /* Delete (do not break) all call backs for host. The host should be | |
987 | * locked. */ | |
988 | int | |
989 | DeleteAllCallBacks_r(struct host *host, int deletefe) | |
990 | { | |
991 | struct CallBack *cb; | |
992 | int cbi, first; | |
993 | ||
994 | cbstuff.DeleteAllCallBacks++; | |
995 | cbi = first = host->z.cblist; | |
996 | if (!cbi) { | |
997 | ViceLog(8, ("DV: no call backs\n")); | |
998 | return 0; | |
999 | } | |
1000 | do { | |
1001 | cb = itocb(cbi); | |
1002 | cbi = cb->hnext; | |
1003 | TDel(cb); | |
1004 | CDel(cb, deletefe); | |
1005 | } while (cbi != first); | |
1006 | host->z.cblist = 0; | |
1007 | return 0; | |
1008 | } | |
1009 | ||
1010 | /* | |
1011 | * Break all delayed call backs for host. Returns 1 if all call backs | |
1012 | * successfully broken; 0 otherwise. Assumes host is h_Held and h_Locked. | |
1013 | * Must be called with VenusDown set for this host | |
1014 | */ | |
1015 | int | |
1016 | BreakDelayedCallBacks(struct host *host) | |
1017 | { | |
1018 | int retVal; | |
1019 | H_LOCK; | |
1020 | retVal = BreakDelayedCallBacks_r(host); | |
1021 | H_UNLOCK; | |
1022 | return retVal; | |
1023 | } | |
1024 | ||
1025 | int | |
1026 | BreakDelayedCallBacks_r(struct host *host) | |
1027 | { | |
1028 | struct AFSFid fids[AFSCBMAX]; | |
1029 | int cbi, first, nfids; | |
1030 | struct CallBack *cb; | |
1031 | int code; | |
1032 | char hoststr[16]; | |
1033 | struct rx_connection *cb_conn; | |
1034 | ||
1035 | cbstuff.nbreakers++; | |
1036 | if (!(host->z.hostFlags & RESETDONE) && !(host->z.hostFlags & HOSTDELETED)) { | |
1037 | host->z.hostFlags &= ~ALTADDR; /* alternate addresses are invalid */ | |
1038 | cb_conn = host->z.callback_rxcon; | |
1039 | rx_GetConnection(cb_conn); | |
1040 | if (host->z.interface) { | |
1041 | H_UNLOCK; | |
1042 | code = | |
1043 | RXAFSCB_InitCallBackState3(cb_conn, &FS_HostUUID); | |
1044 | } else { | |
1045 | H_UNLOCK; | |
1046 | code = RXAFSCB_InitCallBackState(cb_conn); | |
1047 | } | |
1048 | rx_PutConnection(cb_conn); | |
1049 | cb_conn = NULL; | |
1050 | H_LOCK; | |
1051 | host->z.hostFlags |= ALTADDR; /* alternate addresses are valid */ | |
1052 | if (code) { | |
1053 | if (ShowProblems) { | |
1054 | ViceLog(0, | |
1055 | ("CB: Call back connect back failed (in break delayed) " | |
1056 | "for Host %p (%s:%d)\n", | |
1057 | host, afs_inet_ntoa_r(host->z.host, hoststr), | |
1058 | ntohs(host->z.port))); | |
1059 | } | |
1060 | host->z.hostFlags |= VENUSDOWN; | |
1061 | } else { | |
1062 | ViceLog(25, | |
1063 | ("InitCallBackState success on %p (%s:%d)\n", | |
1064 | host, afs_inet_ntoa_r(host->z.host, hoststr), | |
1065 | ntohs(host->z.port))); | |
1066 | /* reset was done successfully */ | |
1067 | host->z.hostFlags |= RESETDONE; | |
1068 | host->z.hostFlags &= ~VENUSDOWN; | |
1069 | } | |
1070 | } else | |
1071 | while (!(host->z.hostFlags & HOSTDELETED)) { | |
1072 | nfids = 0; | |
1073 | host->z.hostFlags &= ~VENUSDOWN; /* presume up */ | |
1074 | cbi = first = host->z.cblist; | |
1075 | if (!cbi) | |
1076 | break; | |
1077 | do { | |
1078 | first = host->z.cblist; | |
1079 | cb = itocb(cbi); | |
1080 | cbi = cb->hnext; | |
1081 | if (cb->status == CB_DELAYED) { | |
1082 | struct FileEntry *fe = itofe(cb->fhead); | |
1083 | fids[nfids].Volume = fe->volid; | |
1084 | fids[nfids].Vnode = fe->vnode; | |
1085 | fids[nfids].Unique = fe->unique; | |
1086 | nfids++; | |
1087 | HDel(cb); | |
1088 | TDel(cb); | |
1089 | CDel(cb, 1); | |
1090 | } | |
1091 | } while (cbi && cbi != first && nfids < AFSCBMAX); | |
1092 | ||
1093 | if (nfids == 0) { | |
1094 | break; | |
1095 | } | |
1096 | ||
1097 | if (XCallBackBulk_r(host, fids, nfids)) { | |
1098 | /* Failed, again: put them back, probably with old | |
1099 | * timeout values */ | |
1100 | int i; | |
1101 | if (ShowProblems) { | |
1102 | ViceLog(0, | |
1103 | ("CB: XCallBackBulk failed, Host %p (%s:%d); " | |
1104 | "callback list follows:\n", | |
1105 | host, afs_inet_ntoa_r(host->z.host, hoststr), | |
1106 | ntohs(host->z.port))); | |
1107 | } | |
1108 | for (i = 0; i < nfids; i++) { | |
1109 | if (ShowProblems) { | |
1110 | ViceLog(0, | |
1111 | ("CB: Host %p (%s:%d), file %u.%u.%u " | |
1112 | "(part of bulk callback)\n", | |
1113 | host, afs_inet_ntoa_r(host->z.host, hoststr), | |
1114 | ntohs(host->z.port), fids[i].Volume, | |
1115 | fids[i].Vnode, fids[i].Unique)); | |
1116 | } | |
1117 | /* used to do this: | |
1118 | * AddCallBack1_r(host, &fids[i], itot(thead[i]), CB_DELAYED, 1); | |
1119 | * * but it turns out to cause too many tricky locking problems. | |
1120 | * * now, if break delayed fails, screw it. */ | |
1121 | } | |
1122 | host->z.hostFlags |= VENUSDOWN; /* Failed */ | |
1123 | ClearHostCallbacks_r(host, 1 /* locked */ ); | |
1124 | break; | |
1125 | } | |
1126 | if (nfids < AFSCBMAX) | |
1127 | break; | |
1128 | } | |
1129 | ||
1130 | cbstuff.nbreakers--; | |
1131 | /* If we succeeded it's always ok to unset HFE_LATER */ | |
1132 | if (!(host->z.hostFlags & VENUSDOWN)) | |
1133 | host->z.hostFlags &= ~HFE_LATER; | |
1134 | return (host->z.hostFlags & VENUSDOWN); | |
1135 | } | |
1136 | ||
1137 | static int | |
1138 | MultiBreakVolumeCallBack_r(struct host *host, | |
1139 | struct VCBParams *parms, int deletefe) | |
1140 | { | |
1141 | char hoststr[16]; | |
1142 | ||
1143 | if (host->z.hostFlags & HOSTDELETED) | |
1144 | return 0; | |
1145 | ||
1146 | if (!(host->z.hostFlags & HCBREAK)) | |
1147 | return 0; /* host is not flagged to notify */ | |
1148 | ||
1149 | if (host->z.hostFlags & VENUSDOWN) { | |
1150 | h_Lock_r(host); | |
1151 | /* Do not care if the host is now HOSTDELETED */ | |
1152 | if (ShowProblems) { | |
1153 | ViceLog(0, | |
1154 | ("BVCB: volume callback for Host %p (%s:%d) failed\n", | |
1155 | host, afs_inet_ntoa_r(host->z.host, hoststr), | |
1156 | ntohs(host->z.port))); | |
1157 | } | |
1158 | DeleteAllCallBacks_r(host, deletefe); /* Delete all callback state | |
1159 | * rather than attempting to | |
1160 | * selectively remember to | |
1161 | * delete the volume callbacks | |
1162 | * later */ | |
1163 | host->z.hostFlags &= ~(RESETDONE|HCBREAK); /* Do InitCallBackState when host returns */ | |
1164 | h_Unlock_r(host); | |
1165 | return 0; | |
1166 | } | |
1167 | opr_Assert(parms->ncbas <= MAX_CB_HOSTS); | |
1168 | ||
1169 | /* Do not call MultiBreakCallBack on the current host structure | |
1170 | ** because it would prematurely release the hold on the host | |
1171 | */ | |
1172 | if (parms->ncbas == MAX_CB_HOSTS) { | |
1173 | struct AFSCBFids tf; | |
1174 | ||
1175 | tf.AFSCBFids_len = 1; | |
1176 | tf.AFSCBFids_val = parms->fid; | |
1177 | ||
1178 | /* this releases all the hosts */ | |
1179 | MultiBreakCallBack_r(parms->cba, parms->ncbas, &tf); | |
1180 | ||
1181 | parms->ncbas = 0; | |
1182 | } | |
1183 | parms->cba[parms->ncbas].hp = host; | |
1184 | parms->cba[(parms->ncbas)++].thead = parms->thead; | |
1185 | host->z.hostFlags &= ~HCBREAK; | |
1186 | ||
1187 | /* we have more work to do on this host, so make sure we keep a reference | |
1188 | * to it */ | |
1189 | h_Hold_r(host); | |
1190 | ||
1191 | return 0; | |
1192 | } | |
1193 | ||
1194 | static int | |
1195 | MultiBreakVolumeLaterCallBack(struct host *host, void *rock) | |
1196 | { | |
1197 | struct VCBParams *parms = (struct VCBParams *)rock; | |
1198 | int retval; | |
1199 | H_LOCK; | |
1200 | retval = MultiBreakVolumeCallBack_r(host, parms, 0); | |
1201 | H_UNLOCK; | |
1202 | return retval; | |
1203 | } | |
1204 | ||
1205 | /* | |
1206 | * Break all call backs on a single volume. Don't call this with any | |
1207 | * hosts h_held. Note that this routine clears the callbacks before | |
1208 | * actually breaking them, and that the vnode isn't locked during this | |
1209 | * operation, so that people might see temporary callback loss while | |
1210 | * this function is executing. It is just a temporary state, however, | |
1211 | * since the callback will be broken later by this same function. | |
1212 | * | |
1213 | * Now uses multi-RX for CallBack RPC in a different thread, | |
1214 | * only marking them here. | |
1215 | */ | |
1216 | extern pthread_cond_t fsync_cond; | |
1217 | ||
1218 | int | |
1219 | BreakVolumeCallBacksLater(VolumeId volume) | |
1220 | { | |
1221 | int hash; | |
1222 | afs_uint32 *feip; | |
1223 | struct FileEntry *fe; | |
1224 | struct CallBack *cb; | |
1225 | struct host *host; | |
1226 | int found = 0; | |
1227 | ||
1228 | ViceLog(25, ("Setting later on volume %" AFS_VOLID_FMT "\n", | |
1229 | afs_printable_VolumeId_lu(volume))); | |
1230 | H_LOCK; | |
1231 | for (hash = 0; hash < FEHASH_SIZE; hash++) { | |
1232 | for (feip = &HashTable[hash]; (fe = itofe(*feip)) != NULL; ) { | |
1233 | if (fe->volid == volume) { | |
1234 | struct CallBack *cbnext; | |
1235 | for (cb = itocb(fe->firstcb); cb; cb = cbnext) { | |
1236 | host = h_itoh(cb->hhead); | |
1237 | host->z.hostFlags |= HFE_LATER; | |
1238 | cb->status = CB_DELAYED; | |
1239 | cbnext = itocb(cb->cnext); | |
1240 | } | |
1241 | FSYNC_LOCK; | |
1242 | fe->status |= FE_LATER; | |
1243 | FSYNC_UNLOCK; | |
1244 | found = 1; | |
1245 | } | |
1246 | feip = &fe->fnext; | |
1247 | } | |
1248 | } | |
1249 | H_UNLOCK; | |
1250 | if (!found) { | |
1251 | /* didn't find any callbacks, so return right away. */ | |
1252 | return 0; | |
1253 | } | |
1254 | ||
1255 | ViceLog(25, ("Fsync thread wakeup\n")); | |
1256 | FSYNC_LOCK; | |
1257 | opr_cv_broadcast(&fsync_cond); | |
1258 | FSYNC_UNLOCK; | |
1259 | return 0; | |
1260 | } | |
1261 | ||
1262 | int | |
1263 | BreakLaterCallBacks(void) | |
1264 | { | |
1265 | struct AFSFid fid; | |
1266 | int hash; | |
1267 | afs_uint32 *feip; | |
1268 | struct CallBack *cb; | |
1269 | struct FileEntry *fe = NULL; | |
1270 | struct FileEntry *myfe = NULL; | |
1271 | struct host *host; | |
1272 | struct VCBParams henumParms; | |
1273 | unsigned short tthead = 0; /* zero is illegal value */ | |
1274 | char hoststr[16]; | |
1275 | ||
1276 | /* Unchain first */ | |
1277 | ViceLog(25, ("Looking for FileEntries to unchain\n")); | |
1278 | H_LOCK; | |
1279 | FSYNC_LOCK; | |
1280 | /* Pick the first volume we see to clean up */ | |
1281 | fid.Volume = fid.Vnode = fid.Unique = 0; | |
1282 | ||
1283 | for (hash = 0; hash < FEHASH_SIZE; hash++) { | |
1284 | for (feip = &HashTable[hash]; (fe = itofe(*feip)) != NULL; ) { | |
1285 | if (fe && (fe->status & FE_LATER) | |
1286 | && (fid.Volume == 0 || fid.Volume == fe->volid)) { | |
1287 | /* Ugly, but used to avoid left side casting */ | |
1288 | struct object *tmpfe; | |
1289 | ViceLog(125, | |
1290 | ("Unchaining for %u:%u:%" AFS_VOLID_FMT "\n", fe->vnode, | |
1291 | fe->unique, afs_printable_VolumeId_lu(fe->volid))); | |
1292 | fid.Volume = fe->volid; | |
1293 | *feip = fe->fnext; | |
1294 | fe->status &= ~FE_LATER; /* not strictly needed */ | |
1295 | /* Works since volid is deeper than the largest pointer */ | |
1296 | tmpfe = (struct object *)fe; | |
1297 | tmpfe->next = (struct object *)myfe; | |
1298 | myfe = fe; | |
1299 | } else | |
1300 | feip = &fe->fnext; | |
1301 | } | |
1302 | } | |
1303 | FSYNC_UNLOCK; | |
1304 | ||
1305 | if (!myfe) { | |
1306 | H_UNLOCK; | |
1307 | return 0; | |
1308 | } | |
1309 | ||
1310 | /* loop over FEs from myfe and free/break */ | |
1311 | tthead = 0; | |
1312 | for (fe = myfe; fe;) { | |
1313 | struct CallBack *cbnext; | |
1314 | for (cb = itocb(fe->firstcb); cb; cb = cbnext) { | |
1315 | cbnext = itocb(cb->cnext); | |
1316 | host = h_itoh(cb->hhead); | |
1317 | if (cb->status == CB_DELAYED) { | |
1318 | if (!(host->z.hostFlags & HOSTDELETED)) { | |
1319 | /* mark this host for notification */ | |
1320 | host->z.hostFlags |= HCBREAK; | |
1321 | if (!tthead || (TNorm(tthead) < TNorm(cb->thead))) { | |
1322 | tthead = cb->thead; | |
1323 | } | |
1324 | } | |
1325 | TDel(cb); | |
1326 | HDel(cb); | |
1327 | CDel(cb, 0); /* Don't let CDel clean up the fe */ | |
1328 | /* leave flag for MultiBreakVolumeCallBack to clear */ | |
1329 | } else { | |
1330 | ViceLog(125, | |
1331 | ("Found host %p (%s:%d) non-DELAYED cb for %u:%u:%" AFS_VOLID_FMT "\n", | |
1332 | host, afs_inet_ntoa_r(host->z.host, hoststr), | |
1333 | ntohs(host->z.port), fe->vnode, fe->unique, | |
1334 | afs_printable_VolumeId_lu(fe->volid))); | |
1335 | } | |
1336 | } | |
1337 | myfe = fe; | |
1338 | fe = (struct FileEntry *)((struct object *)fe)->next; | |
1339 | FreeFE(myfe); | |
1340 | } | |
1341 | ||
1342 | if (tthead) { | |
1343 | ViceLog(125, ("Breaking volume %u\n", fid.Volume)); | |
1344 | henumParms.ncbas = 0; | |
1345 | henumParms.fid = &fid; | |
1346 | henumParms.thead = tthead; | |
1347 | H_UNLOCK; | |
1348 | h_Enumerate(MultiBreakVolumeLaterCallBack, (char *)&henumParms); | |
1349 | H_LOCK; | |
1350 | if (henumParms.ncbas) { /* do left-overs */ | |
1351 | struct AFSCBFids tf; | |
1352 | tf.AFSCBFids_len = 1; | |
1353 | tf.AFSCBFids_val = &fid; | |
1354 | ||
1355 | MultiBreakCallBack_r(henumParms.cba, henumParms.ncbas, &tf); | |
1356 | henumParms.ncbas = 0; | |
1357 | } | |
1358 | } | |
1359 | H_UNLOCK; | |
1360 | ||
1361 | /* Arrange to be called again */ | |
1362 | return 1; | |
1363 | } | |
1364 | ||
1365 | /* | |
1366 | * Delete all timed-out call back entries (to be called periodically by file | |
1367 | * server) | |
1368 | */ | |
1369 | int | |
1370 | CleanupTimedOutCallBacks(void) | |
1371 | { | |
1372 | H_LOCK; | |
1373 | CleanupTimedOutCallBacks_r(); | |
1374 | H_UNLOCK; | |
1375 | return 0; | |
1376 | } | |
1377 | ||
1378 | int | |
1379 | CleanupTimedOutCallBacks_r(void) | |
1380 | { | |
1381 | afs_uint32 now = CBtime(time(NULL)); | |
1382 | afs_uint32 *thead; | |
1383 | struct CallBack *cb; | |
1384 | int ntimedout = 0; | |
1385 | char hoststr[16]; | |
1386 | ||
1387 | while (tfirst <= now) { | |
1388 | int cbi; | |
1389 | cbi = *(thead = THead(tfirst)); | |
1390 | if (cbi) { | |
1391 | do { | |
1392 | cb = itocb(cbi); | |
1393 | cbi = cb->tnext; | |
1394 | ViceLog(8, | |
1395 | ("CCB: deleting timed out call back %x (%s:%d), (%" AFS_VOLID_FMT ",%u,%u)\n", | |
1396 | h_itoh(cb->hhead)->z.host, | |
1397 | afs_inet_ntoa_r(h_itoh(cb->hhead)->z.host, hoststr), | |
1398 | h_itoh(cb->hhead)->z.port, | |
1399 | afs_printable_VolumeId_lu(itofe(cb->fhead)->volid), | |
1400 | itofe(cb->fhead)->vnode, itofe(cb->fhead)->unique)); | |
1401 | HDel(cb); | |
1402 | CDel(cb, 1); | |
1403 | ntimedout++; | |
1404 | if (ntimedout > cbstuff.nblks) { | |
1405 | ViceLog(0, ("CCB: Internal Error -- shutting down...\n")); | |
1406 | DumpCallBackState_r(); | |
1407 | ShutDownAndCore(PANIC); | |
1408 | } | |
1409 | } while (cbi != *thead); | |
1410 | *thead = 0; | |
1411 | } | |
1412 | tfirst++; | |
1413 | } | |
1414 | cbstuff.CBsTimedOut += ntimedout; | |
1415 | ViceLog(7, ("CCB: deleted %d timed out callbacks\n", ntimedout)); | |
1416 | return (ntimedout > 0); | |
1417 | } | |
1418 | ||
1419 | /** | |
1420 | * parameters to pass to lih*_r from h_Enumerate_r when trying to find a host | |
1421 | * from which to clear callbacks. | |
1422 | */ | |
1423 | struct lih_params { | |
1424 | /** | |
1425 | * Points to the least interesting host found; try to clear callbacks on | |
1426 | * this host after h_Enumerate_r(lih*_r)'ing. | |
1427 | */ | |
1428 | struct host *lih; | |
1429 | ||
1430 | /** | |
1431 | * The last host we got from lih*_r, but we couldn't clear its callbacks | |
1432 | * for some reason. Choose the next-best host after this one (with the | |
1433 | * current lih*_r, this means to only select hosts that have an ActiveCall | |
1434 | * newer than lastlih). | |
1435 | */ | |
1436 | struct host *lastlih; | |
1437 | }; | |
1438 | ||
1439 | /* Value of host->z.refCount that allows us to reliably infer that | |
1440 | * host may be held by some other thread */ | |
1441 | #define OTHER_MUSTHOLD_LIH 2 | |
1442 | ||
1443 | /* This version does not allow 'host' to be selected unless its ActiveCall | |
1444 | * is newer than 'params->lastlih' which is the host with the oldest | |
1445 | * ActiveCall from the last pass (if it is provided). We filter out any hosts | |
1446 | * that are are held by other threads. | |
1447 | * | |
1448 | * There is a small problem here, but it may not be easily fixable. Say we | |
1449 | * select some host A, and give it back to GetSomeSpace_r. GSS_r for some | |
1450 | * reason cannot clear the callbacks on A, and so calls us again with | |
1451 | * lastlih = A. Suppose there is another host B that has the same ActiveCall | |
1452 | * time as A. We will now skip over host B, since | |
1453 | * 'hostB->z.ActiveCall > hostA->z.ActiveCall' is not true. This could result in | |
1454 | * us prematurely going to the GSS_r 2nd or 3rd pass, and making us a little | |
1455 | * inefficient. This should be pretty rare, though, except perhaps in cases | |
1456 | * with very small numbers of hosts. | |
1457 | * | |
1458 | * Also filter out any hosts with HOSTDELETED set. h_Enumerate_r should in | |
1459 | * theory not give these to us anyway, but be paranoid. | |
1460 | */ | |
1461 | static int | |
1462 | lih0_r(struct host *host, void *rock) | |
1463 | { | |
1464 | struct lih_params *params = (struct lih_params *)rock; | |
1465 | ||
1466 | /* OTHER_MUSTHOLD_LIH is because the h_Enum loop holds us once */ | |
1467 | if (host->z.cblist | |
1468 | && (!(host->z.hostFlags & HOSTDELETED)) | |
1469 | && (host->z.refCount < OTHER_MUSTHOLD_LIH) | |
1470 | && (!params->lih || host->z.ActiveCall < params->lih->z.ActiveCall) | |
1471 | && (!params->lastlih || host->z.ActiveCall > params->lastlih->z.ActiveCall)) { | |
1472 | ||
1473 | if (params->lih) { | |
1474 | h_Release_r(params->lih); /* release prev host */ | |
1475 | } | |
1476 | ||
1477 | h_Hold_r(host); | |
1478 | params->lih = host; | |
1479 | } | |
1480 | return 0; | |
1481 | } | |
1482 | ||
1483 | /* same as lih0_r, except we do not prevent held hosts from being selected. */ | |
1484 | static int | |
1485 | lih1_r(struct host *host, void *rock) | |
1486 | { | |
1487 | struct lih_params *params = (struct lih_params *)rock; | |
1488 | ||
1489 | if (host->z.cblist | |
1490 | && (!(host->z.hostFlags & HOSTDELETED)) | |
1491 | && (!params->lih || host->z.ActiveCall < params->lih->z.ActiveCall) | |
1492 | && (!params->lastlih || host->z.ActiveCall > params->lastlih->z.ActiveCall)) { | |
1493 | ||
1494 | if (params->lih) { | |
1495 | h_Release_r(params->lih); /* release prev host */ | |
1496 | } | |
1497 | ||
1498 | h_Hold_r(host); | |
1499 | params->lih = host; | |
1500 | } | |
1501 | return 0; | |
1502 | } | |
1503 | ||
1504 | /* This could be upgraded to get more space each time */ | |
1505 | /* first pass: sequentially find the oldest host which isn't held by | |
1506 | anyone for which we can clear callbacks; | |
1507 | skipping 'hostp' */ | |
1508 | /* second pass: sequentially find the oldest host regardless of | |
1509 | whether or not the host is held; skipping 'hostp' */ | |
1510 | /* third pass: attempt to clear callbacks from 'hostp' */ | |
1511 | /* always called with hostp unlocked */ | |
1512 | ||
1513 | /* Note: hostlist is ordered most recently created host first and | |
1514 | * its order has no relationship to the most recently used. */ | |
1515 | extern struct host *hostList; | |
1516 | static int | |
1517 | GetSomeSpace_r(struct host *hostp, int locked) | |
1518 | { | |
1519 | struct host *hp; | |
1520 | struct lih_params params; | |
1521 | int i = 0; | |
1522 | ||
1523 | if (cbstuff.GotSomeSpaces == 0) { | |
1524 | /* only log this once; if GSS is getting called constantly, that's not | |
1525 | * good but don't make things worse by spamming the log. */ | |
1526 | ViceLog(0, ("We have run out of callback space; forcing callback revocation. " | |
1527 | "This suggests the fileserver is configured with insufficient " | |
1528 | "callbacks; you probably want to increase the -cb fileserver " | |
1529 | "parameter (current setting: %u). The fileserver will continue " | |
1530 | "to operate, but this may indicate a severe performance problem\n", | |
1531 | cbstuff.nblks)); | |
1532 | ViceLog(0, ("This message is logged at most once; for more information " | |
1533 | "see the OpenAFS documentation and fileserver xstat collection 3\n")); | |
1534 | } | |
1535 | ||
1536 | cbstuff.GotSomeSpaces++; | |
1537 | ViceLog(5, | |
1538 | ("GSS: First looking for timed out call backs via CleanupCallBacks\n")); | |
1539 | if (CleanupTimedOutCallBacks_r()) { | |
1540 | cbstuff.GSS3++; | |
1541 | return 0; | |
1542 | } | |
1543 | ||
1544 | i = 0; | |
1545 | params.lastlih = NULL; | |
1546 | ||
1547 | do { | |
1548 | params.lih = NULL; | |
1549 | ||
1550 | h_Enumerate_r(i == 0 ? lih0_r : lih1_r, hostList, ¶ms); | |
1551 | ||
1552 | hp = params.lih; | |
1553 | if (params.lastlih) { | |
1554 | h_Release_r(params.lastlih); | |
1555 | params.lastlih = NULL; | |
1556 | } | |
1557 | ||
1558 | if (hp) { | |
1559 | /* note that 'hp' was held by lih*_r; we will need to release it */ | |
1560 | cbstuff.GSS4++; | |
1561 | if ((hp != hostp) && !ClearHostCallbacks_r(hp, 0 /* not locked or held */ )) { | |
1562 | h_Release_r(hp); | |
1563 | return 0; | |
1564 | } | |
1565 | ||
1566 | params.lastlih = hp; | |
1567 | /* params.lastlih will be released on the next iteration, after | |
1568 | * h_Enumerate_r */ | |
1569 | ||
1570 | } else { | |
1571 | /* | |
1572 | * Next time try getting callbacks from any host even if | |
1573 | * it's held, since the only other option is starvation for | |
1574 | * the file server (i.e. until the callback timeout arrives). | |
1575 | */ | |
1576 | i++; | |
1577 | params.lastlih = NULL; | |
1578 | cbstuff.GSS1++; | |
1579 | ViceLog(5, | |
1580 | ("GSS: Try harder for longest inactive host cnt= %d\n", | |
1581 | i)); | |
1582 | } | |
1583 | } while (i < 2); | |
1584 | ||
1585 | /* Could not obtain space from other hosts, clear hostp's callback state */ | |
1586 | cbstuff.GSS2++; | |
1587 | if (!locked) { | |
1588 | h_Lock_r(hostp); | |
1589 | } | |
1590 | ClearHostCallbacks_r(hostp, 1 /*already locked */ ); | |
1591 | if (!locked) { | |
1592 | h_Unlock_r(hostp); | |
1593 | } | |
1594 | return 0; | |
1595 | } | |
1596 | ||
1597 | /* locked - set if caller has already locked the host */ | |
1598 | static int | |
1599 | ClearHostCallbacks_r(struct host *hp, int locked) | |
1600 | { | |
1601 | int code; | |
1602 | char hoststr[16]; | |
1603 | struct rx_connection *cb_conn = NULL; | |
1604 | ||
1605 | ViceLog(5, | |
1606 | ("GSS: Delete longest inactive host %p (%s:%d)\n", | |
1607 | hp, afs_inet_ntoa_r(hp->z.host, hoststr), ntohs(hp->z.port))); | |
1608 | ||
1609 | if ((hp->z.hostFlags & HOSTDELETED)) { | |
1610 | /* hp could go away after reacquiring H_LOCK in h_NBLock_r, so we can't | |
1611 | * really use it; its callbacks will get cleared anyway when | |
1612 | * h_TossStuff_r gets its hands on it */ | |
1613 | return 1; | |
1614 | } | |
1615 | ||
1616 | h_Hold_r(hp); | |
1617 | ||
1618 | /** Try a non-blocking lock. If the lock is already held return | |
1619 | * after releasing hold on hp | |
1620 | */ | |
1621 | if (!locked) { | |
1622 | if (h_NBLock_r(hp)) { | |
1623 | h_Release_r(hp); | |
1624 | return 1; | |
1625 | } | |
1626 | } | |
1627 | if (hp->z.Console & 2) { | |
1628 | /* | |
1629 | * If the special console field is set it means that a thread | |
1630 | * is waiting in AddCallBack1 after it set pointers to the | |
1631 | * file entry and/or callback entry. Because of the bogus | |
1632 | * usage of h_hold it won't prevent from another thread, this | |
1633 | * one, to remove all the callbacks so just to be safe we keep | |
1634 | * a reference. NOTE, on the last phase we'll free the calling | |
1635 | * host's callbacks but that's ok... | |
1636 | */ | |
1637 | cbstuff.GSS5++; | |
1638 | } | |
1639 | DeleteAllCallBacks_r(hp, 1); | |
1640 | if (hp->z.hostFlags & VENUSDOWN) { | |
1641 | hp->z.hostFlags &= ~RESETDONE; /* remember that we must do a reset */ | |
1642 | } else if (!(hp->z.hostFlags & HOSTDELETED)) { | |
1643 | /* host is up, try a call */ | |
1644 | hp->z.hostFlags &= ~ALTADDR; /* alternate addresses are invalid */ | |
1645 | cb_conn = hp->z.callback_rxcon; | |
1646 | rx_GetConnection(hp->z.callback_rxcon); | |
1647 | if (hp->z.interface) { | |
1648 | H_UNLOCK; | |
1649 | code = | |
1650 | RXAFSCB_InitCallBackState3(cb_conn, &FS_HostUUID); | |
1651 | } else { | |
1652 | H_UNLOCK; | |
1653 | code = RXAFSCB_InitCallBackState(cb_conn); | |
1654 | } | |
1655 | rx_PutConnection(cb_conn); | |
1656 | cb_conn = NULL; | |
1657 | H_LOCK; | |
1658 | hp->z.hostFlags |= ALTADDR; /* alternate addresses are valid */ | |
1659 | if (code) { | |
1660 | /* failed, mark host down and need reset */ | |
1661 | hp->z.hostFlags |= VENUSDOWN; | |
1662 | hp->z.hostFlags &= ~RESETDONE; | |
1663 | } else { | |
1664 | /* reset succeeded, we're done */ | |
1665 | hp->z.hostFlags |= RESETDONE; | |
1666 | } | |
1667 | } | |
1668 | if (!locked) | |
1669 | h_Unlock_r(hp); | |
1670 | h_Release_r(hp); | |
1671 | ||
1672 | return 0; | |
1673 | } | |
1674 | #endif /* INTERPRET_DUMP */ | |
1675 | ||
1676 | ||
1677 | int | |
1678 | PrintCallBackStats(void) | |
1679 | { | |
1680 | fprintf(stderr, | |
1681 | "%d add CB, %d break CB, %d del CB, %d del FE, %d CB's timed out, %d space reclaim, %d del host\n", | |
1682 | cbstuff.AddCallBacks, cbstuff.BreakCallBacks, | |
1683 | cbstuff.DeleteCallBacks, cbstuff.DeleteFiles, cbstuff.CBsTimedOut, | |
1684 | cbstuff.GotSomeSpaces, cbstuff.DeleteAllCallBacks); | |
1685 | fprintf(stderr, "%d CBs, %d FEs, (%d of total of %d 16-byte blocks)\n", | |
1686 | cbstuff.nCBs, cbstuff.nFEs, cbstuff.nCBs + cbstuff.nFEs, | |
1687 | cbstuff.nblks); | |
1688 | fprintf(stderr, "%d GSS1, %d GSS2, %d GSS3, %d GSS4, %d GSS5 (internal counters)\n", | |
1689 | cbstuff.GSS1, cbstuff.GSS2, cbstuff.GSS3, cbstuff.GSS4, cbstuff.GSS5); | |
1690 | ||
1691 | return 0; | |
1692 | } | |
1693 | ||
1694 | #define MAGIC 0x12345678 /* To check byte ordering of dump when it is read in */ | |
1695 | #define MAGICV2 0x12345679 /* To check byte ordering & version of dump when it is read in */ | |
1696 | ||
1697 | ||
1698 | #ifndef INTERPRET_DUMP | |
1699 | ||
1700 | #ifdef AFS_DEMAND_ATTACH_FS | |
1701 | /* | |
1702 | * demand attach fs | |
1703 | * callback state serialization | |
1704 | */ | |
1705 | static int cb_stateSaveTimeouts(struct fs_dump_state * state); | |
1706 | static int cb_stateSaveFEHash(struct fs_dump_state * state); | |
1707 | static int cb_stateSaveFEs(struct fs_dump_state * state); | |
1708 | static int cb_stateSaveFE(struct fs_dump_state * state, struct FileEntry * fe); | |
1709 | static int cb_stateRestoreTimeouts(struct fs_dump_state * state); | |
1710 | static int cb_stateRestoreFEHash(struct fs_dump_state * state); | |
1711 | static int cb_stateRestoreFEs(struct fs_dump_state * state); | |
1712 | static int cb_stateRestoreFE(struct fs_dump_state * state); | |
1713 | static int cb_stateRestoreCBs(struct fs_dump_state * state, struct FileEntry * fe, | |
1714 | struct iovec * iov, int niovecs); | |
1715 | ||
1716 | static int cb_stateVerifyFEHash(struct fs_dump_state * state); | |
1717 | static int cb_stateVerifyFE(struct fs_dump_state * state, struct FileEntry * fe); | |
1718 | static int cb_stateVerifyFCBList(struct fs_dump_state * state, struct FileEntry * fe); | |
1719 | static int cb_stateVerifyTimeoutQueues(struct fs_dump_state * state); | |
1720 | ||
1721 | static int cb_stateFEToDiskEntry(struct FileEntry *, struct FEDiskEntry *); | |
1722 | static int cb_stateDiskEntryToFE(struct fs_dump_state * state, | |
1723 | struct FEDiskEntry *, struct FileEntry *); | |
1724 | ||
1725 | static int cb_stateCBToDiskEntry(struct CallBack *, struct CBDiskEntry *); | |
1726 | static int cb_stateDiskEntryToCB(struct fs_dump_state * state, | |
1727 | struct CBDiskEntry *, struct CallBack *); | |
1728 | ||
1729 | static int cb_stateFillHeader(struct callback_state_header * hdr); | |
1730 | static int cb_stateCheckHeader(struct callback_state_header * hdr); | |
1731 | ||
1732 | static int cb_stateAllocMap(struct fs_dump_state * state); | |
1733 | ||
1734 | int | |
1735 | cb_stateSave(struct fs_dump_state * state) | |
1736 | { | |
1737 | int ret = 0; | |
1738 | ||
1739 | AssignInt64(state->eof_offset, &state->hdr->cb_offset); | |
1740 | ||
1741 | /* invalidate callback state header */ | |
1742 | memset(state->cb_hdr, 0, sizeof(struct callback_state_header)); | |
1743 | if (fs_stateWriteHeader(state, &state->hdr->cb_offset, state->cb_hdr, | |
1744 | sizeof(struct callback_state_header))) { | |
1745 | ret = 1; | |
1746 | goto done; | |
1747 | } | |
1748 | ||
1749 | fs_stateIncEOF(state, sizeof(struct callback_state_header)); | |
1750 | ||
1751 | /* dump timeout state */ | |
1752 | if (cb_stateSaveTimeouts(state)) { | |
1753 | ret = 1; | |
1754 | goto done; | |
1755 | } | |
1756 | ||
1757 | /* dump fe hashtable state */ | |
1758 | if (cb_stateSaveFEHash(state)) { | |
1759 | ret = 1; | |
1760 | goto done; | |
1761 | } | |
1762 | ||
1763 | /* dump callback state */ | |
1764 | if (cb_stateSaveFEs(state)) { | |
1765 | ret = 1; | |
1766 | goto done; | |
1767 | } | |
1768 | ||
1769 | /* write the callback state header to disk */ | |
1770 | cb_stateFillHeader(state->cb_hdr); | |
1771 | if (fs_stateWriteHeader(state, &state->hdr->cb_offset, state->cb_hdr, | |
1772 | sizeof(struct callback_state_header))) { | |
1773 | ret = 1; | |
1774 | goto done; | |
1775 | } | |
1776 | ||
1777 | done: | |
1778 | return ret; | |
1779 | } | |
1780 | ||
1781 | int | |
1782 | cb_stateRestore(struct fs_dump_state * state) | |
1783 | { | |
1784 | int ret = 0; | |
1785 | ||
1786 | if (fs_stateReadHeader(state, &state->hdr->cb_offset, state->cb_hdr, | |
1787 | sizeof(struct callback_state_header))) { | |
1788 | ret = 1; | |
1789 | goto done; | |
1790 | } | |
1791 | ||
1792 | if (cb_stateCheckHeader(state->cb_hdr)) { | |
1793 | ret = 1; | |
1794 | goto done; | |
1795 | } | |
1796 | ||
1797 | if (cb_stateAllocMap(state)) { | |
1798 | ret = 1; | |
1799 | goto done; | |
1800 | } | |
1801 | ||
1802 | if (cb_stateRestoreTimeouts(state)) { | |
1803 | ret = 1; | |
1804 | goto done; | |
1805 | } | |
1806 | ||
1807 | if (cb_stateRestoreFEHash(state)) { | |
1808 | ret = 1; | |
1809 | goto done; | |
1810 | } | |
1811 | ||
1812 | /* restore FEs and CBs from disk */ | |
1813 | if (cb_stateRestoreFEs(state)) { | |
1814 | ret = 1; | |
1815 | goto done; | |
1816 | } | |
1817 | ||
1818 | /* restore the timeout queue heads */ | |
1819 | tfirst = state->cb_hdr->tfirst; | |
1820 | ||
1821 | done: | |
1822 | return ret; | |
1823 | } | |
1824 | ||
1825 | int | |
1826 | cb_stateRestoreIndices(struct fs_dump_state * state) | |
1827 | { | |
1828 | int i, ret = 0; | |
1829 | struct FileEntry * fe; | |
1830 | struct CallBack * cb; | |
1831 | ||
1832 | /* restore indices in the FileEntry structures */ | |
1833 | for (i = 1; i < state->fe_map.len; i++) { | |
1834 | if (state->fe_map.entries[i].new_idx) { | |
1835 | fe = itofe(state->fe_map.entries[i].new_idx); | |
1836 | ||
1837 | /* restore the fe->fnext entry */ | |
1838 | if (fe_OldToNew(state, fe->fnext, &fe->fnext)) { | |
1839 | ret = 1; | |
1840 | goto done; | |
1841 | } | |
1842 | ||
1843 | /* restore the fe->firstcb entry */ | |
1844 | if (cb_OldToNew(state, fe->firstcb, &fe->firstcb)) { | |
1845 | ret = 1; | |
1846 | goto done; | |
1847 | } | |
1848 | } | |
1849 | } | |
1850 | ||
1851 | /* restore indices in the CallBack structures */ | |
1852 | for (i = 1; i < state->cb_map.len; i++) { | |
1853 | if (state->cb_map.entries[i].new_idx) { | |
1854 | cb = itocb(state->cb_map.entries[i].new_idx); | |
1855 | ||
1856 | /* restore the cb->cnext entry */ | |
1857 | if (cb_OldToNew(state, cb->cnext, &cb->cnext)) { | |
1858 | ret = 1; | |
1859 | goto done; | |
1860 | } | |
1861 | ||
1862 | /* restore the cb->fhead entry */ | |
1863 | if (fe_OldToNew(state, cb->fhead, &cb->fhead)) { | |
1864 | ret = 1; | |
1865 | goto done; | |
1866 | } | |
1867 | ||
1868 | /* restore the cb->hhead entry */ | |
1869 | if (h_OldToNew(state, cb->hhead, &cb->hhead)) { | |
1870 | ret = 1; | |
1871 | goto done; | |
1872 | } | |
1873 | ||
1874 | /* restore the cb->tprev entry */ | |
1875 | if (cb_OldToNew(state, cb->tprev, &cb->tprev)) { | |
1876 | ret = 1; | |
1877 | goto done; | |
1878 | } | |
1879 | ||
1880 | /* restore the cb->tnext entry */ | |
1881 | if (cb_OldToNew(state, cb->tnext, &cb->tnext)) { | |
1882 | ret = 1; | |
1883 | goto done; | |
1884 | } | |
1885 | ||
1886 | /* restore the cb->hprev entry */ | |
1887 | if (cb_OldToNew(state, cb->hprev, &cb->hprev)) { | |
1888 | ret = 1; | |
1889 | goto done; | |
1890 | } | |
1891 | ||
1892 | /* restore the cb->hnext entry */ | |
1893 | if (cb_OldToNew(state, cb->hnext, &cb->hnext)) { | |
1894 | ret = 1; | |
1895 | goto done; | |
1896 | } | |
1897 | } | |
1898 | } | |
1899 | ||
1900 | /* restore the timeout queue head indices */ | |
1901 | for (i = 0; i < state->cb_timeout_hdr->records; i++) { | |
1902 | if (cb_OldToNew(state, timeout[i], &timeout[i])) { | |
1903 | ret = 1; | |
1904 | goto done; | |
1905 | } | |
1906 | } | |
1907 | ||
1908 | /* restore the FE hash table queue heads */ | |
1909 | for (i = 0; i < state->cb_fehash_hdr->records; i++) { | |
1910 | if (fe_OldToNew(state, HashTable[i], &HashTable[i])) { | |
1911 | ret = 1; | |
1912 | goto done; | |
1913 | } | |
1914 | } | |
1915 | ||
1916 | done: | |
1917 | return ret; | |
1918 | } | |
1919 | ||
1920 | int | |
1921 | cb_stateVerify(struct fs_dump_state * state) | |
1922 | { | |
1923 | int ret = 0; | |
1924 | ||
1925 | if (cb_stateVerifyFEHash(state)) { | |
1926 | ret = 1; | |
1927 | } | |
1928 | ||
1929 | if (cb_stateVerifyTimeoutQueues(state)) { | |
1930 | ret = 1; | |
1931 | } | |
1932 | ||
1933 | return ret; | |
1934 | } | |
1935 | ||
1936 | static int | |
1937 | cb_stateVerifyFEHash(struct fs_dump_state * state) | |
1938 | { | |
1939 | int ret = 0, i; | |
1940 | struct FileEntry * fe; | |
1941 | afs_uint32 fei, chain_len; | |
1942 | ||
1943 | for (i = 0; i < FEHASH_SIZE; i++) { | |
1944 | chain_len = 0; | |
1945 | for (fei = HashTable[i], fe = itofe(fei); | |
1946 | fe; | |
1947 | fei = fe->fnext, fe = itofe(fei)) { | |
1948 | if (fei > cbstuff.nblks) { | |
1949 | ViceLog(0, ("cb_stateVerifyFEHash: error: index out of range (fei=%d)\n", fei)); | |
1950 | ret = 1; | |
1951 | break; | |
1952 | } | |
1953 | if (cb_stateVerifyFE(state, fe)) { | |
1954 | ret = 1; | |
1955 | } | |
1956 | if (chain_len > FS_STATE_FE_MAX_HASH_CHAIN_LEN) { | |
1957 | ViceLog(0, ("cb_stateVerifyFEHash: error: hash chain %d length exceeds %d; assuming there's a loop\n", | |
1958 | i, FS_STATE_FE_MAX_HASH_CHAIN_LEN)); | |
1959 | ret = 1; | |
1960 | break; | |
1961 | } | |
1962 | chain_len++; | |
1963 | } | |
1964 | } | |
1965 | ||
1966 | return ret; | |
1967 | } | |
1968 | ||
1969 | static int | |
1970 | cb_stateVerifyFE(struct fs_dump_state * state, struct FileEntry * fe) | |
1971 | { | |
1972 | int ret = 0; | |
1973 | ||
1974 | if ((fe->firstcb && !fe->ncbs) || | |
1975 | (!fe->firstcb && fe->ncbs)) { | |
1976 | ViceLog(0, ("cb_stateVerifyFE: error: fe->firstcb does not agree with fe->ncbs (fei=%lu, fe->firstcb=%lu, fe->ncbs=%lu)\n", | |
1977 | afs_printable_uint32_lu(fetoi(fe)), | |
1978 | afs_printable_uint32_lu(fe->firstcb), | |
1979 | afs_printable_uint32_lu(fe->ncbs))); | |
1980 | ret = 1; | |
1981 | } | |
1982 | if (cb_stateVerifyFCBList(state, fe)) { | |
1983 | ViceLog(0, ("cb_stateVerifyFE: error: FCBList failed verification (fei=%lu)\n", | |
1984 | afs_printable_uint32_lu(fetoi(fe)))); | |
1985 | ret = 1; | |
1986 | } | |
1987 | ||
1988 | return ret; | |
1989 | } | |
1990 | ||
1991 | static int | |
1992 | cb_stateVerifyFCBList(struct fs_dump_state * state, struct FileEntry * fe) | |
1993 | { | |
1994 | int ret = 0; | |
1995 | afs_uint32 cbi, fei, chain_len = 0; | |
1996 | struct CallBack * cb; | |
1997 | ||
1998 | fei = fetoi(fe); | |
1999 | ||
2000 | for (cbi = fe->firstcb, cb = itocb(cbi); | |
2001 | cb; | |
2002 | cbi = cb->cnext, cb = itocb(cbi)) { | |
2003 | if (cbi > cbstuff.nblks) { | |
2004 | ViceLog(0, ("cb_stateVerifyFCBList: error: list index out of range (cbi=%d, ncbs=%d)\n", | |
2005 | cbi, cbstuff.nblks)); | |
2006 | ret = 1; | |
2007 | goto done; | |
2008 | } | |
2009 | if (cb->fhead != fei) { | |
2010 | ViceLog(0, ("cb_stateVerifyFCBList: error: cb->fhead != fei (fei=%d, cb->fhead=%d)\n", | |
2011 | fei, cb->fhead)); | |
2012 | ret = 1; | |
2013 | } | |
2014 | if (chain_len > FS_STATE_FCB_MAX_LIST_LEN) { | |
2015 | ViceLog(0, ("cb_stateVerifyFCBList: error: list length exceeds %d (fei=%d); assuming there's a loop\n", | |
2016 | FS_STATE_FCB_MAX_LIST_LEN, fei)); | |
2017 | ret = 1; | |
2018 | goto done; | |
2019 | } | |
2020 | chain_len++; | |
2021 | } | |
2022 | ||
2023 | if (fe->ncbs != chain_len) { | |
2024 | ViceLog(0, ("cb_stateVerifyFCBList: error: list length mismatch (len=%d, fe->ncbs=%d)\n", | |
2025 | chain_len, fe->ncbs)); | |
2026 | ret = 1; | |
2027 | } | |
2028 | ||
2029 | done: | |
2030 | return ret; | |
2031 | } | |
2032 | ||
2033 | int | |
2034 | cb_stateVerifyHCBList(struct fs_dump_state * state, struct host * host) | |
2035 | { | |
2036 | int ret = 0; | |
2037 | afs_uint32 hi, chain_len, cbi; | |
2038 | struct CallBack *cb, *ncb; | |
2039 | ||
2040 | hi = h_htoi(host); | |
2041 | chain_len = 0; | |
2042 | ||
2043 | for (cbi = host->z.cblist, cb = itocb(cbi); | |
2044 | cb; | |
2045 | cbi = cb->hnext, cb = ncb) { | |
2046 | if (chain_len && (host->z.cblist == cbi)) { | |
2047 | /* we've wrapped around the circular list, and everything looks ok */ | |
2048 | break; | |
2049 | } | |
2050 | if (cb->hhead != hi) { | |
2051 | ViceLog(0, ("cb_stateVerifyHCBList: error: incorrect cb->hhead (cbi=%d, h->index=%d, cb->hhead=%d)\n", | |
2052 | cbi, hi, cb->hhead)); | |
2053 | ret = 1; | |
2054 | } | |
2055 | if (!cb->hprev || !cb->hnext) { | |
2056 | ViceLog(0, ("cb_stateVerifyHCBList: error: null index in circular list (cbi=%d, h->index=%d)\n", | |
2057 | cbi, hi)); | |
2058 | ret = 1; | |
2059 | goto done; | |
2060 | } | |
2061 | if ((cb->hprev > cbstuff.nblks) || | |
2062 | (cb->hnext > cbstuff.nblks)) { | |
2063 | ViceLog(0, ("cb_stateVerifyHCBList: error: list index out of range (cbi=%d, h->index=%d, cb->hprev=%d, cb->hnext=%d, nCBs=%d)\n", | |
2064 | cbi, hi, cb->hprev, cb->hnext, cbstuff.nblks)); | |
2065 | ret = 1; | |
2066 | goto done; | |
2067 | } | |
2068 | ncb = itocb(cb->hnext); | |
2069 | if (cbi != ncb->hprev) { | |
2070 | ViceLog(0, ("cb_stateVerifyHCBList: error: corrupt linked list (cbi=%d, h->index=%d)\n", | |
2071 | cbi, hi)); | |
2072 | ret = 1; | |
2073 | goto done; | |
2074 | } | |
2075 | if (chain_len > FS_STATE_HCB_MAX_LIST_LEN) { | |
2076 | ViceLog(0, ("cb_stateVerifyFCBList: error: list length exceeds %d (h->index=%d); assuming there's a loop\n", | |
2077 | FS_STATE_HCB_MAX_LIST_LEN, hi)); | |
2078 | ret = 1; | |
2079 | goto done; | |
2080 | } | |
2081 | chain_len++; | |
2082 | } | |
2083 | ||
2084 | done: | |
2085 | return ret; | |
2086 | } | |
2087 | ||
2088 | static int | |
2089 | cb_stateVerifyTimeoutQueues(struct fs_dump_state * state) | |
2090 | { | |
2091 | int ret = 0, i; | |
2092 | afs_uint32 cbi, chain_len; | |
2093 | struct CallBack *cb, *ncb; | |
2094 | ||
2095 | for (i = 0; i < CB_NUM_TIMEOUT_QUEUES; i++) { | |
2096 | chain_len = 0; | |
2097 | for (cbi = timeout[i], cb = itocb(cbi); | |
2098 | cb; | |
2099 | cbi = cb->tnext, cb = ncb) { | |
2100 | if (chain_len && (cbi == timeout[i])) { | |
2101 | /* we've wrapped around the circular list, and everything looks ok */ | |
2102 | break; | |
2103 | } | |
2104 | if (cbi > cbstuff.nblks) { | |
2105 | ViceLog(0, ("cb_stateVerifyTimeoutQueues: error: list index out of range (cbi=%d, tindex=%d)\n", | |
2106 | cbi, i)); | |
2107 | ret = 1; | |
2108 | break; | |
2109 | } | |
2110 | if (itot(cb->thead) != &timeout[i]) { | |
2111 | ViceLog(0, ("cb_stateVerifyTimeoutQueues: error: cb->thead points to wrong timeout queue (tindex=%d, cbi=%d, cb->thead=%d)\n", | |
2112 | i, cbi, cb->thead)); | |
2113 | ret = 1; | |
2114 | } | |
2115 | if (!cb->tprev || !cb->tnext) { | |
2116 | ViceLog(0, ("cb_stateVerifyTimeoutQueues: null index in circular list (cbi=%d, tindex=%d)\n", | |
2117 | cbi, i)); | |
2118 | ret = 1; | |
2119 | break; | |
2120 | } | |
2121 | if ((cb->tprev > cbstuff.nblks) || | |
2122 | (cb->tnext > cbstuff.nblks)) { | |
2123 | ViceLog(0, ("cb_stateVerifyTimeoutQueues: list index out of range (cbi=%d, tindex=%d, cb->tprev=%d, cb->tnext=%d, nCBs=%d)\n", | |
2124 | cbi, i, cb->tprev, cb->tnext, cbstuff.nblks)); | |
2125 | ret = 1; | |
2126 | break; | |
2127 | } | |
2128 | ncb = itocb(cb->tnext); | |
2129 | if (cbi != ncb->tprev) { | |
2130 | ViceLog(0, ("cb_stateVerifyTimeoutQueues: corrupt linked list (cbi=%d, tindex=%d)\n", | |
2131 | cbi, i)); | |
2132 | ret = 1; | |
2133 | break; | |
2134 | } | |
2135 | if (chain_len > FS_STATE_TCB_MAX_LIST_LEN) { | |
2136 | ViceLog(0, ("cb_stateVerifyTimeoutQueues: list length exceeds %d (tindex=%d); assuming there's a loop\n", | |
2137 | FS_STATE_TCB_MAX_LIST_LEN, i)); | |
2138 | ret = 1; | |
2139 | break; | |
2140 | } | |
2141 | chain_len++; | |
2142 | } | |
2143 | } | |
2144 | ||
2145 | return ret; | |
2146 | } | |
2147 | ||
2148 | static int | |
2149 | cb_stateSaveTimeouts(struct fs_dump_state * state) | |
2150 | { | |
2151 | int ret = 0; | |
2152 | struct iovec iov[2]; | |
2153 | ||
2154 | AssignInt64(state->eof_offset, &state->cb_hdr->timeout_offset); | |
2155 | ||
2156 | memset(state->cb_timeout_hdr, 0, sizeof(struct callback_state_fehash_header)); | |
2157 | state->cb_timeout_hdr->magic = CALLBACK_STATE_TIMEOUT_MAGIC; | |
2158 | state->cb_timeout_hdr->records = CB_NUM_TIMEOUT_QUEUES; | |
2159 | state->cb_timeout_hdr->len = sizeof(struct callback_state_timeout_header) + | |
2160 | (state->cb_timeout_hdr->records * sizeof(afs_uint32)); | |
2161 | ||
2162 | iov[0].iov_base = (char *)state->cb_timeout_hdr; | |
2163 | iov[0].iov_len = sizeof(struct callback_state_timeout_header); | |
2164 | iov[1].iov_base = (char *)timeout; | |
2165 | iov[1].iov_len = sizeof(timeout); | |
2166 | ||
2167 | if (fs_stateSeek(state, &state->cb_hdr->timeout_offset)) { | |
2168 | ret = 1; | |
2169 | goto done; | |
2170 | } | |
2171 | ||
2172 | if (fs_stateWriteV(state, iov, 2)) { | |
2173 | ret = 1; | |
2174 | goto done; | |
2175 | } | |
2176 | ||
2177 | fs_stateIncEOF(state, state->cb_timeout_hdr->len); | |
2178 | ||
2179 | done: | |
2180 | return ret; | |
2181 | } | |
2182 | ||
2183 | static int | |
2184 | cb_stateRestoreTimeouts(struct fs_dump_state * state) | |
2185 | { | |
2186 | int ret = 0, len; | |
2187 | ||
2188 | if (fs_stateReadHeader(state, &state->cb_hdr->timeout_offset, | |
2189 | state->cb_timeout_hdr, | |
2190 | sizeof(struct callback_state_timeout_header))) { | |
2191 | ret = 1; | |
2192 | goto done; | |
2193 | } | |
2194 | ||
2195 | if (state->cb_timeout_hdr->magic != CALLBACK_STATE_TIMEOUT_MAGIC) { | |
2196 | ret = 1; | |
2197 | goto done; | |
2198 | } | |
2199 | if (state->cb_timeout_hdr->records != CB_NUM_TIMEOUT_QUEUES) { | |
2200 | ret = 1; | |
2201 | goto done; | |
2202 | } | |
2203 | ||
2204 | len = state->cb_timeout_hdr->records * sizeof(afs_uint32); | |
2205 | ||
2206 | if (state->cb_timeout_hdr->len != | |
2207 | (sizeof(struct callback_state_timeout_header) + len)) { | |
2208 | ret = 1; | |
2209 | goto done; | |
2210 | } | |
2211 | ||
2212 | if (fs_stateRead(state, timeout, len)) { | |
2213 | ret = 1; | |
2214 | goto done; | |
2215 | } | |
2216 | ||
2217 | done: | |
2218 | return ret; | |
2219 | } | |
2220 | ||
2221 | static int | |
2222 | cb_stateSaveFEHash(struct fs_dump_state * state) | |
2223 | { | |
2224 | int ret = 0; | |
2225 | struct iovec iov[2]; | |
2226 | ||
2227 | AssignInt64(state->eof_offset, &state->cb_hdr->fehash_offset); | |
2228 | ||
2229 | memset(state->cb_fehash_hdr, 0, sizeof(struct callback_state_fehash_header)); | |
2230 | state->cb_fehash_hdr->magic = CALLBACK_STATE_FEHASH_MAGIC; | |
2231 | state->cb_fehash_hdr->records = FEHASH_SIZE; | |
2232 | state->cb_fehash_hdr->len = sizeof(struct callback_state_fehash_header) + | |
2233 | (state->cb_fehash_hdr->records * sizeof(afs_uint32)); | |
2234 | ||
2235 | iov[0].iov_base = (char *)state->cb_fehash_hdr; | |
2236 | iov[0].iov_len = sizeof(struct callback_state_fehash_header); | |
2237 | iov[1].iov_base = (char *)HashTable; | |
2238 | iov[1].iov_len = sizeof(HashTable); | |
2239 | ||
2240 | if (fs_stateSeek(state, &state->cb_hdr->fehash_offset)) { | |
2241 | ret = 1; | |
2242 | goto done; | |
2243 | } | |
2244 | ||
2245 | if (fs_stateWriteV(state, iov, 2)) { | |
2246 | ret = 1; | |
2247 | goto done; | |
2248 | } | |
2249 | ||
2250 | fs_stateIncEOF(state, state->cb_fehash_hdr->len); | |
2251 | ||
2252 | done: | |
2253 | return ret; | |
2254 | } | |
2255 | ||
2256 | static int | |
2257 | cb_stateRestoreFEHash(struct fs_dump_state * state) | |
2258 | { | |
2259 | int ret = 0, len; | |
2260 | ||
2261 | if (fs_stateReadHeader(state, &state->cb_hdr->fehash_offset, | |
2262 | state->cb_fehash_hdr, | |
2263 | sizeof(struct callback_state_fehash_header))) { | |
2264 | ret = 1; | |
2265 | goto done; | |
2266 | } | |
2267 | ||
2268 | if (state->cb_fehash_hdr->magic != CALLBACK_STATE_FEHASH_MAGIC) { | |
2269 | ret = 1; | |
2270 | goto done; | |
2271 | } | |
2272 | if (state->cb_fehash_hdr->records != FEHASH_SIZE) { | |
2273 | ret = 1; | |
2274 | goto done; | |
2275 | } | |
2276 | ||
2277 | len = state->cb_fehash_hdr->records * sizeof(afs_uint32); | |
2278 | ||
2279 | if (state->cb_fehash_hdr->len != | |
2280 | (sizeof(struct callback_state_fehash_header) + len)) { | |
2281 | ret = 1; | |
2282 | goto done; | |
2283 | } | |
2284 | ||
2285 | if (fs_stateRead(state, HashTable, len)) { | |
2286 | ret = 1; | |
2287 | goto done; | |
2288 | } | |
2289 | ||
2290 | done: | |
2291 | return ret; | |
2292 | } | |
2293 | ||
2294 | static int | |
2295 | cb_stateSaveFEs(struct fs_dump_state * state) | |
2296 | { | |
2297 | int ret = 0; | |
2298 | int fei, hash; | |
2299 | struct FileEntry *fe; | |
2300 | ||
2301 | AssignInt64(state->eof_offset, &state->cb_hdr->fe_offset); | |
2302 | ||
2303 | for (hash = 0; hash < FEHASH_SIZE ; hash++) { | |
2304 | for (fei = HashTable[hash]; fei; fei = fe->fnext) { | |
2305 | fe = itofe(fei); | |
2306 | if (cb_stateSaveFE(state, fe)) { | |
2307 | ret = 1; | |
2308 | goto done; | |
2309 | } | |
2310 | } | |
2311 | } | |
2312 | ||
2313 | done: | |
2314 | return ret; | |
2315 | } | |
2316 | ||
2317 | static int | |
2318 | cb_stateRestoreFEs(struct fs_dump_state * state) | |
2319 | { | |
2320 | int count, nFEs, ret = 0; | |
2321 | ||
2322 | nFEs = state->cb_hdr->nFEs; | |
2323 | ||
2324 | for (count = 0; count < nFEs; count++) { | |
2325 | if (cb_stateRestoreFE(state)) { | |
2326 | ret = 1; | |
2327 | goto done; | |
2328 | } | |
2329 | } | |
2330 | ||
2331 | done: | |
2332 | return ret; | |
2333 | } | |
2334 | ||
2335 | static int | |
2336 | cb_stateSaveFE(struct fs_dump_state * state, struct FileEntry * fe) | |
2337 | { | |
2338 | int ret = 0, iovcnt, cbi, written = 0; | |
2339 | afs_uint32 fei; | |
2340 | struct callback_state_entry_header hdr; | |
2341 | struct FEDiskEntry fedsk; | |
2342 | struct CBDiskEntry cbdsk[16]; | |
2343 | struct iovec iov[16]; | |
2344 | struct CallBack *cb; | |
2345 | ||
2346 | fei = fetoi(fe); | |
2347 | if (fei > state->cb_hdr->fe_max) { | |
2348 | state->cb_hdr->fe_max = fei; | |
2349 | } | |
2350 | ||
2351 | memset(&hdr, 0, sizeof(struct callback_state_entry_header)); | |
2352 | ||
2353 | if (cb_stateFEToDiskEntry(fe, &fedsk)) { | |
2354 | ret = 1; | |
2355 | goto done; | |
2356 | } | |
2357 | ||
2358 | iov[0].iov_base = (char *)&hdr; | |
2359 | iov[0].iov_len = sizeof(hdr); | |
2360 | iov[1].iov_base = (char *)&fedsk; | |
2361 | iov[1].iov_len = sizeof(struct FEDiskEntry); | |
2362 | iovcnt = 2; | |
2363 | ||
2364 | for (cbi = fe->firstcb, cb = itocb(cbi); | |
2365 | cb != NULL; | |
2366 | cbi = cb->cnext, cb = itocb(cbi), hdr.nCBs++) { | |
2367 | if (cbi > state->cb_hdr->cb_max) { | |
2368 | state->cb_hdr->cb_max = cbi; | |
2369 | } | |
2370 | if (cb_stateCBToDiskEntry(cb, &cbdsk[iovcnt])) { | |
2371 | ret = 1; | |
2372 | goto done; | |
2373 | } | |
2374 | cbdsk[iovcnt].index = cbi; | |
2375 | iov[iovcnt].iov_base = (char *)&cbdsk[iovcnt]; | |
2376 | iov[iovcnt].iov_len = sizeof(struct CBDiskEntry); | |
2377 | iovcnt++; | |
2378 | if ((iovcnt == 16) || (!cb->cnext)) { | |
2379 | if (fs_stateWriteV(state, iov, iovcnt)) { | |
2380 | ret = 1; | |
2381 | goto done; | |
2382 | } | |
2383 | written = 1; | |
2384 | iovcnt = 0; | |
2385 | } | |
2386 | } | |
2387 | ||
2388 | hdr.magic = CALLBACK_STATE_ENTRY_MAGIC; | |
2389 | hdr.len = sizeof(hdr) + sizeof(struct FEDiskEntry) + | |
2390 | (hdr.nCBs * sizeof(struct CBDiskEntry)); | |
2391 | ||
2392 | if (!written) { | |
2393 | if (fs_stateWriteV(state, iov, iovcnt)) { | |
2394 | ret = 1; | |
2395 | goto done; | |
2396 | } | |
2397 | } else { | |
2398 | if (fs_stateWriteHeader(state, &state->eof_offset, &hdr, sizeof(hdr))) { | |
2399 | ret = 1; | |
2400 | goto done; | |
2401 | } | |
2402 | } | |
2403 | ||
2404 | fs_stateIncEOF(state, hdr.len); | |
2405 | ||
2406 | if (written) { | |
2407 | if (fs_stateSeek(state, &state->eof_offset)) { | |
2408 | ret = 1; | |
2409 | goto done; | |
2410 | } | |
2411 | } | |
2412 | ||
2413 | state->cb_hdr->nFEs++; | |
2414 | state->cb_hdr->nCBs += hdr.nCBs; | |
2415 | ||
2416 | done: | |
2417 | return ret; | |
2418 | } | |
2419 | ||
2420 | static int | |
2421 | cb_stateRestoreFE(struct fs_dump_state * state) | |
2422 | { | |
2423 | int ret = 0, iovcnt, nCBs; | |
2424 | struct callback_state_entry_header hdr; | |
2425 | struct FEDiskEntry fedsk; | |
2426 | struct CBDiskEntry cbdsk[16]; | |
2427 | struct iovec iov[16]; | |
2428 | struct FileEntry * fe; | |
2429 | ||
2430 | iov[0].iov_base = (char *)&hdr; | |
2431 | iov[0].iov_len = sizeof(hdr); | |
2432 | iov[1].iov_base = (char *)&fedsk; | |
2433 | iov[1].iov_len = sizeof(fedsk); | |
2434 | iovcnt = 2; | |
2435 | ||
2436 | if (fs_stateReadV(state, iov, iovcnt)) { | |
2437 | ret = 1; | |
2438 | goto done; | |
2439 | } | |
2440 | ||
2441 | if (hdr.magic != CALLBACK_STATE_ENTRY_MAGIC) { | |
2442 | ret = 1; | |
2443 | goto done; | |
2444 | } | |
2445 | ||
2446 | fe = GetFE(); | |
2447 | if (fe == NULL) { | |
2448 | ViceLog(0, ("cb_stateRestoreFE: ran out of free FileEntry structures\n")); | |
2449 | ret = 1; | |
2450 | goto done; | |
2451 | } | |
2452 | ||
2453 | if (cb_stateDiskEntryToFE(state, &fedsk, fe)) { | |
2454 | ret = 1; | |
2455 | goto done; | |
2456 | } | |
2457 | ||
2458 | if (hdr.nCBs) { | |
2459 | for (iovcnt = 0, nCBs = 0; | |
2460 | nCBs < hdr.nCBs; | |
2461 | nCBs++) { | |
2462 | iov[iovcnt].iov_base = (char *)&cbdsk[iovcnt]; | |
2463 | iov[iovcnt].iov_len = sizeof(struct CBDiskEntry); | |
2464 | iovcnt++; | |
2465 | if ((iovcnt == 16) || (nCBs == hdr.nCBs - 1)) { | |
2466 | if (fs_stateReadV(state, iov, iovcnt)) { | |
2467 | ret = 1; | |
2468 | goto done; | |
2469 | } | |
2470 | if (cb_stateRestoreCBs(state, fe, iov, iovcnt)) { | |
2471 | ret = 1; | |
2472 | goto done; | |
2473 | } | |
2474 | iovcnt = 0; | |
2475 | } | |
2476 | } | |
2477 | } | |
2478 | ||
2479 | done: | |
2480 | return ret; | |
2481 | } | |
2482 | ||
2483 | static int | |
2484 | cb_stateRestoreCBs(struct fs_dump_state * state, struct FileEntry * fe, | |
2485 | struct iovec * iov, int niovecs) | |
2486 | { | |
2487 | int ret = 0, idx; | |
2488 | struct CallBack * cb; | |
2489 | struct CBDiskEntry * cbdsk; | |
2490 | ||
2491 | for (idx = 0; idx < niovecs; idx++) { | |
2492 | cbdsk = (struct CBDiskEntry *) iov[idx].iov_base; | |
2493 | ||
2494 | if (cbdsk->cb.hhead < state->h_map.len && | |
2495 | state->h_map.entries[cbdsk->cb.hhead].valid == FS_STATE_IDX_SKIPPED) { | |
2496 | continue; | |
2497 | } | |
2498 | ||
2499 | if ((cb = GetCB()) == NULL) { | |
2500 | ViceLog(0, ("cb_stateRestoreCBs: ran out of free CallBack structures\n")); | |
2501 | ret = 1; | |
2502 | goto done; | |
2503 | } | |
2504 | if (cb_stateDiskEntryToCB(state, cbdsk, cb)) { | |
2505 | ViceLog(0, ("cb_stateRestoreCBs: corrupt CallBack disk entry\n")); | |
2506 | ret = 1; | |
2507 | goto done; | |
2508 | } | |
2509 | } | |
2510 | ||
2511 | done: | |
2512 | return ret; | |
2513 | } | |
2514 | ||
2515 | ||
2516 | static int | |
2517 | cb_stateFillHeader(struct callback_state_header * hdr) | |
2518 | { | |
2519 | hdr->stamp.magic = CALLBACK_STATE_MAGIC; | |
2520 | hdr->stamp.version = CALLBACK_STATE_VERSION; | |
2521 | hdr->tfirst = tfirst; | |
2522 | return 0; | |
2523 | } | |
2524 | ||
2525 | static int | |
2526 | cb_stateCheckHeader(struct callback_state_header * hdr) | |
2527 | { | |
2528 | int ret = 0; | |
2529 | ||
2530 | if (hdr->stamp.magic != CALLBACK_STATE_MAGIC) { | |
2531 | ret = 1; | |
2532 | } else if (hdr->stamp.version != CALLBACK_STATE_VERSION) { | |
2533 | ret = 1; | |
2534 | } else if ((hdr->nFEs > cbstuff.nblks) || (hdr->nCBs > cbstuff.nblks)) { | |
2535 | ViceLog(0, ("cb_stateCheckHeader: saved callback state larger than callback memory allocation\n")); | |
2536 | ret = 1; | |
2537 | } | |
2538 | return ret; | |
2539 | } | |
2540 | ||
2541 | /* disk entry conversion routines */ | |
2542 | static int | |
2543 | cb_stateFEToDiskEntry(struct FileEntry * in, struct FEDiskEntry * out) | |
2544 | { | |
2545 | memcpy(&out->fe, in, sizeof(struct FileEntry)); | |
2546 | out->index = fetoi(in); | |
2547 | return 0; | |
2548 | } | |
2549 | ||
2550 | static int | |
2551 | cb_stateDiskEntryToFE(struct fs_dump_state * state, | |
2552 | struct FEDiskEntry * in, struct FileEntry * out) | |
2553 | { | |
2554 | int ret = 0; | |
2555 | ||
2556 | memcpy(out, &in->fe, sizeof(struct FileEntry)); | |
2557 | ||
2558 | /* setup FE map entry */ | |
2559 | if (!in->index || (in->index >= state->fe_map.len)) { | |
2560 | ViceLog(0, ("cb_stateDiskEntryToFE: index (%d) out of range", | |
2561 | in->index)); | |
2562 | ret = 1; | |
2563 | goto done; | |
2564 | } | |
2565 | state->fe_map.entries[in->index].valid = FS_STATE_IDX_VALID; | |
2566 | state->fe_map.entries[in->index].old_idx = in->index; | |
2567 | state->fe_map.entries[in->index].new_idx = fetoi(out); | |
2568 | ||
2569 | done: | |
2570 | return ret; | |
2571 | } | |
2572 | ||
2573 | static int | |
2574 | cb_stateCBToDiskEntry(struct CallBack * in, struct CBDiskEntry * out) | |
2575 | { | |
2576 | memcpy(&out->cb, in, sizeof(struct CallBack)); | |
2577 | out->index = cbtoi(in); | |
2578 | return 0; | |
2579 | } | |
2580 | ||
2581 | static int | |
2582 | cb_stateDiskEntryToCB(struct fs_dump_state * state, | |
2583 | struct CBDiskEntry * in, struct CallBack * out) | |
2584 | { | |
2585 | int ret = 0; | |
2586 | ||
2587 | memcpy(out, &in->cb, sizeof(struct CallBack)); | |
2588 | ||
2589 | /* setup CB map entry */ | |
2590 | if (!in->index || (in->index >= state->cb_map.len)) { | |
2591 | ViceLog(0, ("cb_stateDiskEntryToCB: index (%d) out of range\n", | |
2592 | in->index)); | |
2593 | ret = 1; | |
2594 | goto done; | |
2595 | } | |
2596 | state->cb_map.entries[in->index].valid = FS_STATE_IDX_VALID; | |
2597 | state->cb_map.entries[in->index].old_idx = in->index; | |
2598 | state->cb_map.entries[in->index].new_idx = cbtoi(out); | |
2599 | ||
2600 | done: | |
2601 | return ret; | |
2602 | } | |
2603 | ||
2604 | /* index map routines */ | |
2605 | static int | |
2606 | cb_stateAllocMap(struct fs_dump_state * state) | |
2607 | { | |
2608 | state->fe_map.len = state->cb_hdr->fe_max + 1; | |
2609 | state->cb_map.len = state->cb_hdr->cb_max + 1; | |
2610 | state->fe_map.entries = (struct idx_map_entry_t *) | |
2611 | calloc(state->fe_map.len, sizeof(struct idx_map_entry_t)); | |
2612 | state->cb_map.entries = (struct idx_map_entry_t *) | |
2613 | calloc(state->cb_map.len, sizeof(struct idx_map_entry_t)); | |
2614 | return ((state->fe_map.entries != NULL) && (state->cb_map.entries != NULL)) ? 0 : 1; | |
2615 | } | |
2616 | ||
2617 | int | |
2618 | fe_OldToNew(struct fs_dump_state * state, afs_uint32 old, afs_uint32 * new) | |
2619 | { | |
2620 | int ret = 0; | |
2621 | ||
2622 | /* FEs use a one-based indexing system, so old==0 implies no mapping */ | |
2623 | if (!old) { | |
2624 | *new = 0; | |
2625 | goto done; | |
2626 | } | |
2627 | ||
2628 | if (old >= state->fe_map.len) { | |
2629 | ViceLog(0, ("fe_OldToNew: index %d is out of range\n", old)); | |
2630 | ret = 1; | |
2631 | } else if (state->fe_map.entries[old].valid != FS_STATE_IDX_VALID || | |
2632 | state->fe_map.entries[old].old_idx != old) { /* sanity check */ | |
2633 | ViceLog(0, ("fe_OldToNew: index %d points to an invalid FileEntry record\n", old)); | |
2634 | ret = 1; | |
2635 | } else { | |
2636 | *new = state->fe_map.entries[old].new_idx; | |
2637 | } | |
2638 | ||
2639 | done: | |
2640 | return ret; | |
2641 | } | |
2642 | ||
2643 | int | |
2644 | cb_OldToNew(struct fs_dump_state * state, afs_uint32 old, afs_uint32 * new) | |
2645 | { | |
2646 | int ret = 0; | |
2647 | ||
2648 | /* CBs use a one-based indexing system, so old==0 implies no mapping */ | |
2649 | if (!old) { | |
2650 | *new = 0; | |
2651 | goto done; | |
2652 | } | |
2653 | ||
2654 | if (old >= state->cb_map.len) { | |
2655 | ViceLog(0, ("cb_OldToNew: index %d is out of range\n", old)); | |
2656 | ret = 1; | |
2657 | } else if (state->cb_map.entries[old].valid != FS_STATE_IDX_VALID || | |
2658 | state->cb_map.entries[old].old_idx != old) { /* sanity check */ | |
2659 | ViceLog(0, ("cb_OldToNew: index %d points to an invalid CallBack record\n", old)); | |
2660 | ret = 1; | |
2661 | } else { | |
2662 | *new = state->cb_map.entries[old].new_idx; | |
2663 | } | |
2664 | ||
2665 | done: | |
2666 | return ret; | |
2667 | } | |
2668 | #endif /* AFS_DEMAND_ATTACH_FS */ | |
2669 | ||
2670 | #define DumpBytes(fd,buf,req) if (write(fd, buf, req) < 0) {} /* don't care */ | |
2671 | ||
2672 | static int | |
2673 | DumpCallBackState_r(void) | |
2674 | { | |
2675 | int fd, oflag; | |
2676 | afs_uint32 magic = MAGICV2, now = (afs_int32) time(NULL), freelisthead; | |
2677 | ||
2678 | oflag = O_WRONLY | O_CREAT | O_TRUNC; | |
2679 | #ifdef AFS_NT40_ENV | |
2680 | oflag |= O_BINARY; | |
2681 | #endif | |
2682 | fd = open(AFSDIR_SERVER_CBKDUMP_FILEPATH, oflag, 0666); | |
2683 | if (fd < 0) { | |
2684 | ViceLog(0, | |
2685 | ("Couldn't create callback dump file %s\n", | |
2686 | AFSDIR_SERVER_CBKDUMP_FILEPATH)); | |
2687 | return 0; | |
2688 | } | |
2689 | /* | |
2690 | * Collect but ignoring the return value of write(2) here, | |
2691 | * to avoid compiler warnings on some platforms. | |
2692 | */ | |
2693 | DumpBytes(fd, &magic, sizeof(magic)); | |
2694 | DumpBytes(fd, &now, sizeof(now)); | |
2695 | DumpBytes(fd, &cbstuff, sizeof(cbstuff)); | |
2696 | DumpBytes(fd, TimeOuts, sizeof(TimeOuts)); | |
2697 | DumpBytes(fd, timeout, sizeof(timeout)); | |
2698 | DumpBytes(fd, &tfirst, sizeof(tfirst)); | |
2699 | freelisthead = cbtoi((struct CallBack *)CBfree); | |
2700 | DumpBytes(fd, &freelisthead, sizeof(freelisthead)); /* This is a pointer */ | |
2701 | freelisthead = fetoi((struct FileEntry *)FEfree); | |
2702 | DumpBytes(fd, &freelisthead, sizeof(freelisthead)); /* This is a pointer */ | |
2703 | DumpBytes(fd, HashTable, sizeof(HashTable)); | |
2704 | DumpBytes(fd, &CB[1], sizeof(CB[1]) * cbstuff.nblks); /* CB stuff */ | |
2705 | DumpBytes(fd, &FE[1], sizeof(FE[1]) * cbstuff.nblks); /* FE stuff */ | |
2706 | close(fd); | |
2707 | ||
2708 | return 0; | |
2709 | } | |
2710 | ||
2711 | int | |
2712 | DumpCallBackState(void) { | |
2713 | int rc; | |
2714 | ||
2715 | H_LOCK; | |
2716 | rc = DumpCallBackState_r(); | |
2717 | H_UNLOCK; | |
2718 | ||
2719 | return(rc); | |
2720 | } | |
2721 | ||
2722 | #endif /* !INTERPRET_DUMP */ | |
2723 | ||
2724 | #ifdef INTERPRET_DUMP | |
2725 | ||
2726 | static void | |
2727 | ReadBytes(int fd, void *buf, size_t req) | |
2728 | { | |
2729 | ssize_t count; | |
2730 | ||
2731 | count = read(fd, buf, req); | |
2732 | if (count < 0) { | |
2733 | perror("read"); | |
2734 | exit(-1); | |
2735 | } else if (count != req) { | |
2736 | fprintf(stderr, "read: premature EOF (expected %lu, got %lu)\n", | |
2737 | (unsigned long)req, (unsigned long)count); | |
2738 | exit(-1); | |
2739 | } | |
2740 | } | |
2741 | ||
2742 | /* This is only compiled in for the callback analyzer program */ | |
2743 | /* Returns the time of the dump */ | |
2744 | time_t | |
2745 | ReadDump(char *file, int timebits) | |
2746 | { | |
2747 | int fd, oflag; | |
2748 | afs_uint32 magic, freelisthead; | |
2749 | afs_uint32 now; | |
2750 | afs_int64 now64; | |
2751 | ||
2752 | oflag = O_RDONLY; | |
2753 | #ifdef AFS_NT40_ENV | |
2754 | oflag |= O_BINARY; | |
2755 | #endif | |
2756 | fd = open(file, oflag); | |
2757 | if (fd < 0) { | |
2758 | fprintf(stderr, "Couldn't read dump file %s\n", file); | |
2759 | exit(1); | |
2760 | } | |
2761 | ReadBytes(fd, &magic, sizeof(magic)); | |
2762 | if (magic == MAGICV2) { | |
2763 | timebits = 32; | |
2764 | } else { | |
2765 | if (magic != MAGIC) { | |
2766 | fprintf(stderr, | |
2767 | "Magic number of %s is invalid. You might be trying to\n", | |
2768 | file); | |
2769 | fprintf(stderr, | |
2770 | "run this program on a machine type with a different byte ordering.\n"); | |
2771 | exit(1); | |
2772 | } | |
2773 | } | |
2774 | if (timebits == 64) { | |
2775 | ReadBytes(fd, &now64, sizeof(afs_int64)); | |
2776 | now = (afs_int32) now64; | |
2777 | } else | |
2778 | ReadBytes(fd, &now, sizeof(afs_int32)); | |
2779 | ||
2780 | ReadBytes(fd, &cbstuff, sizeof(cbstuff)); | |
2781 | ReadBytes(fd, TimeOuts, sizeof(TimeOuts)); | |
2782 | ReadBytes(fd, timeout, sizeof(timeout)); | |
2783 | ReadBytes(fd, &tfirst, sizeof(tfirst)); | |
2784 | ReadBytes(fd, &freelisthead, sizeof(freelisthead)); | |
2785 | CB = ((struct CallBack | |
2786 | *)(calloc(cbstuff.nblks, sizeof(struct CallBack)))) - 1; | |
2787 | FE = ((struct FileEntry | |
2788 | *)(calloc(cbstuff.nblks, sizeof(struct FileEntry)))) - 1; | |
2789 | CBfree = (struct CallBack *)itocb(freelisthead); | |
2790 | ReadBytes(fd, &freelisthead, sizeof(freelisthead)); | |
2791 | FEfree = (struct FileEntry *)itofe(freelisthead); | |
2792 | ReadBytes(fd, HashTable, sizeof(HashTable)); | |
2793 | ReadBytes(fd, &CB[1], sizeof(CB[1]) * cbstuff.nblks); /* CB stuff */ | |
2794 | ReadBytes(fd, &FE[1], sizeof(FE[1]) * cbstuff.nblks); /* FE stuff */ | |
2795 | if (close(fd)) { | |
2796 | perror("Error reading dumpfile"); | |
2797 | exit(1); | |
2798 | } | |
2799 | return now; | |
2800 | } | |
2801 | ||
2802 | #ifdef AFS_NT40_ENV | |
2803 | #include "AFS_component_version_number.h" | |
2804 | #else | |
2805 | #include "AFS_component_version_number.c" | |
2806 | #endif | |
2807 | ||
2808 | static afs_uint32 *cbTrack; | |
2809 | ||
2810 | int | |
2811 | main(int argc, char **argv) | |
2812 | { | |
2813 | int err = 0, cbi = 0, stats = 0, noptions = 0, all = 0, vol = 0, raw = 0; | |
2814 | static AFSFid fid; | |
2815 | struct FileEntry *fe; | |
2816 | struct CallBack *cb; | |
2817 | time_t now; | |
2818 | int timebits = 32; | |
2819 | ||
2820 | memset(&fid, 0, sizeof(fid)); | |
2821 | argc--; | |
2822 | argv++; | |
2823 | while (argc && **argv == '-') { | |
2824 | noptions++; | |
2825 | argc--; | |
2826 | if (!strcmp(*argv, "-host")) { | |
2827 | if (argc < 1) { | |
2828 | err++; | |
2829 | break; | |
2830 | } | |
2831 | argc--; | |
2832 | cbi = atoi(*++argv); | |
2833 | } else if (!strcmp(*argv, "-fid")) { | |
2834 | if (argc < 2) { | |
2835 | err++; | |
2836 | break; | |
2837 | } | |
2838 | argc -= 3; | |
2839 | fid.Volume = atoi(*++argv); | |
2840 | fid.Vnode = atoi(*++argv); | |
2841 | fid.Unique = atoi(*++argv); | |
2842 | } else if (!strcmp(*argv, "-time")) { | |
2843 | fprintf(stderr, "-time not supported\n"); | |
2844 | exit(1); | |
2845 | } else if (!strcmp(*argv, "-stats")) { | |
2846 | stats = 1; | |
2847 | } else if (!strcmp(*argv, "-all")) { | |
2848 | all = 1; | |
2849 | } else if (!strcmp(*argv, "-raw")) { | |
2850 | raw = 1; | |
2851 | } else if (!strcmp(*argv, "-timebits")) { | |
2852 | if (argc < 1) { | |
2853 | err++; | |
2854 | break; | |
2855 | } | |
2856 | argc--; | |
2857 | timebits = atoi(*++argv); | |
2858 | if ((timebits != 32) | |
2859 | && (timebits != 64) | |
2860 | ) | |
2861 | err++; | |
2862 | } else if (!strcmp(*argv, "-volume")) { | |
2863 | if (argc < 1) { | |
2864 | err++; | |
2865 | break; | |
2866 | } | |
2867 | argc--; | |
2868 | vol = atoi(*++argv); | |
2869 | } else | |
2870 | err++; | |
2871 | argv++; | |
2872 | } | |
2873 | if (err || argc != 1) { | |
2874 | fprintf(stderr, | |
2875 | "Usage: cbd [-host cbid] [-fid volume vnode] [-stats] [-all] [-timebits 32" | |
2876 | "|64" | |
2877 | "] callbackdumpfile\n"); | |
2878 | fprintf(stderr, | |
2879 | "[cbid is shown for each host in the hosts.dump file]\n"); | |
2880 | exit(1); | |
2881 | } | |
2882 | now = ReadDump(*argv, timebits); | |
2883 | if (stats || noptions == 0) { | |
2884 | time_t uxtfirst = UXtime(tfirst), tnow = now; | |
2885 | printf("The time of the dump was %u %s", (unsigned int) now, ctime(&tnow)); | |
2886 | printf("The last time cleanup ran was %u %s", (unsigned int) uxtfirst, | |
2887 | ctime(&uxtfirst)); | |
2888 | PrintCallBackStats(); | |
2889 | } | |
2890 | ||
2891 | cbTrack = calloc(cbstuff.nblks, sizeof(cbTrack[0])); | |
2892 | ||
2893 | if (all || vol) { | |
2894 | int hash; | |
2895 | afs_uint32 *feip; | |
2896 | struct CallBack *cb; | |
2897 | struct FileEntry *fe; | |
2898 | ||
2899 | for (hash = 0; hash < FEHASH_SIZE; hash++) { | |
2900 | for (feip = &HashTable[hash]; (fe = itofe(*feip));) { | |
2901 | if (!vol || (fe->volid == vol)) { | |
2902 | afs_uint32 fe_i = fetoi(fe); | |
2903 | ||
2904 | for (cb = itocb(fe->firstcb); cb; cb = itocb(cb->cnext)) { | |
2905 | afs_uint32 cb_i = cbtoi(cb); | |
2906 | ||
2907 | if (cb_i > cbstuff.nblks) { | |
2908 | printf("CB index out of range (%u > %d), stopped for this FE\n", | |
2909 | cb_i, cbstuff.nblks); | |
2910 | break; | |
2911 | } | |
2912 | ||
2913 | if (cbTrack[cb_i]) { | |
2914 | printf("CB entry already claimed for FE[%u] (this is FE[%u]), stopped\n", | |
2915 | cbTrack[cb_i], fe_i); | |
2916 | break; | |
2917 | } | |
2918 | cbTrack[cb_i] = fe_i; | |
2919 | ||
2920 | PrintCB(cb, now); | |
2921 | } | |
2922 | *feip = fe->fnext; | |
2923 | } else { | |
2924 | feip = &fe->fnext; | |
2925 | } | |
2926 | } | |
2927 | } | |
2928 | } | |
2929 | if (cbi) { | |
2930 | afs_uint32 cfirst = cbi; | |
2931 | do { | |
2932 | cb = itocb(cbi); | |
2933 | PrintCB(cb, now); | |
2934 | cbi = cb->hnext; | |
2935 | } while (cbi != cfirst); | |
2936 | } | |
2937 | if (fid.Volume) { | |
2938 | fe = FindFE(&fid); | |
2939 | if (!fe) { | |
2940 | printf("No callback entries for %u.%u\n", fid.Volume, fid.Vnode); | |
2941 | exit(1); | |
2942 | } | |
2943 | cb = itocb(fe->firstcb); | |
2944 | while (cb) { | |
2945 | PrintCB(cb, now); | |
2946 | cb = itocb(cb->cnext); | |
2947 | } | |
2948 | } | |
2949 | if (raw) { | |
2950 | afs_int32 *p, i; | |
2951 | for (i = 1; i < cbstuff.nblks; i++) { | |
2952 | p = (afs_int32 *) & FE[i]; | |
2953 | printf("%d:%12x%12x%12x%12x\n", i, p[0], p[1], p[2], p[3]); | |
2954 | } | |
2955 | } | |
2956 | ||
2957 | free(cbTrack); | |
2958 | exit(0); | |
2959 | } | |
2960 | ||
2961 | void | |
2962 | PrintCB(struct CallBack *cb, afs_uint32 now) | |
2963 | { | |
2964 | struct FileEntry *fe = itofe(cb->fhead); | |
2965 | time_t expires = TIndexToTime(cb->thead); | |
2966 | ||
2967 | if (fe == NULL) | |
2968 | return; | |
2969 | ||
2970 | printf("vol=%" AFS_VOLID_FMT " vn=%u cbs=%d hi=%d st=%d fest=%d, exp in %lu secs at %s", | |
2971 | afs_printable_VolumeId_lu(fe->volid), fe->vnode, fe->ncbs, | |
2972 | cb->hhead, cb->status, fe->status, (unsigned long)(expires - now), | |
2973 | ctime(&expires)); | |
2974 | } | |
2975 | ||
2976 | #endif | |
2977 | ||
2978 | #if !defined(INTERPRET_DUMP) | |
2979 | /* | |
2980 | ** try breaking calbacks on afidp from host. Use multi_rx. | |
2981 | ** return 0 on success, non-zero on failure | |
2982 | */ | |
2983 | int | |
2984 | MultiBreakCallBackAlternateAddress(struct host *host, struct AFSCBFids *afidp) | |
2985 | { | |
2986 | int retVal; | |
2987 | H_LOCK; | |
2988 | retVal = MultiBreakCallBackAlternateAddress_r(host, afidp); | |
2989 | H_UNLOCK; | |
2990 | return retVal; | |
2991 | } | |
2992 | ||
2993 | int | |
2994 | MultiBreakCallBackAlternateAddress_r(struct host *host, | |
2995 | struct AFSCBFids *afidp) | |
2996 | { | |
2997 | int i, j; | |
2998 | struct rx_connection **conns; | |
2999 | struct rx_connection *connSuccess = 0; | |
3000 | struct AddrPort *interfaces; | |
3001 | static struct rx_securityClass *sc = 0; | |
3002 | static struct AFSCBs tc = { 0, 0 }; | |
3003 | char hoststr[16]; | |
3004 | ||
3005 | /* nothing more can be done */ | |
3006 | if (!host->z.interface) | |
3007 | return 1; /* failure */ | |
3008 | ||
3009 | /* the only address is the primary interface */ | |
3010 | if (host->z.interface->numberOfInterfaces <= 1) | |
3011 | return 1; /* failure */ | |
3012 | ||
3013 | /* initialise a security object only once */ | |
3014 | if (!sc) | |
3015 | sc = rxnull_NewClientSecurityObject(); | |
3016 | ||
3017 | i = host->z.interface->numberOfInterfaces; | |
3018 | interfaces = calloc(i, sizeof(struct AddrPort)); | |
3019 | conns = calloc(i, sizeof(struct rx_connection *)); | |
3020 | if (!interfaces || !conns) { | |
3021 | ViceLogThenPanic(0, ("Failed malloc in " | |
3022 | "MultiBreakCallBackAlternateAddress_r\n")); | |
3023 | } | |
3024 | ||
3025 | /* initialize alternate rx connections */ | |
3026 | for (i = 0, j = 0; i < host->z.interface->numberOfInterfaces; i++) { | |
3027 | /* this is the current primary address */ | |
3028 | if (host->z.host == host->z.interface->interface[i].addr && | |
3029 | host->z.port == host->z.interface->interface[i].port) | |
3030 | continue; | |
3031 | ||
3032 | interfaces[j] = host->z.interface->interface[i]; | |
3033 | conns[j] = | |
3034 | rx_NewConnection(interfaces[j].addr, | |
3035 | interfaces[j].port, 1, sc, 0); | |
3036 | rx_SetConnDeadTime(conns[j], 2); | |
3037 | rx_SetConnHardDeadTime(conns[j], AFS_HARDDEADTIME); | |
3038 | j++; | |
3039 | } | |
3040 | ||
3041 | opr_Assert(j); /* at least one alternate address */ | |
3042 | ViceLog(125, | |
3043 | ("Starting multibreakcall back on all addr for host %p (%s:%d)\n", | |
3044 | host, afs_inet_ntoa_r(host->z.host, hoststr), ntohs(host->z.port))); | |
3045 | H_UNLOCK; | |
3046 | multi_Rx(conns, j) { | |
3047 | multi_RXAFSCB_CallBack(afidp, &tc); | |
3048 | if (!multi_error) { | |
3049 | /* first success */ | |
3050 | H_LOCK; | |
3051 | if (host->z.callback_rxcon) | |
3052 | rx_DestroyConnection(host->z.callback_rxcon); | |
3053 | host->z.callback_rxcon = conns[multi_i]; | |
3054 | /* add then remove */ | |
3055 | addInterfaceAddr_r(host, interfaces[multi_i].addr, | |
3056 | interfaces[multi_i].port); | |
3057 | removeInterfaceAddr_r(host, host->z.host, host->z.port); | |
3058 | host->z.host = interfaces[multi_i].addr; | |
3059 | host->z.port = interfaces[multi_i].port; | |
3060 | connSuccess = conns[multi_i]; | |
3061 | rx_SetConnDeadTime(host->z.callback_rxcon, 50); | |
3062 | rx_SetConnHardDeadTime(host->z.callback_rxcon, AFS_HARDDEADTIME); | |
3063 | ViceLog(125, | |
3064 | ("multibreakcall success with addr %s:%d\n", | |
3065 | afs_inet_ntoa_r(interfaces[multi_i].addr, hoststr), | |
3066 | ntohs(interfaces[multi_i].port))); | |
3067 | H_UNLOCK; | |
3068 | multi_Abort; | |
3069 | } | |
3070 | } | |
3071 | multi_End_Ignore; | |
3072 | H_LOCK; | |
3073 | /* Destroy all connections except the one on which we succeeded */ | |
3074 | for (i = 0; i < j; i++) | |
3075 | if (conns[i] != connSuccess) | |
3076 | rx_DestroyConnection(conns[i]); | |
3077 | ||
3078 | free(interfaces); | |
3079 | free(conns); | |
3080 | ||
3081 | if (connSuccess) | |
3082 | return 0; /* success */ | |
3083 | else | |
3084 | return 1; /* failure */ | |
3085 | } | |
3086 | ||
3087 | ||
3088 | /* | |
3089 | ** try multi_RX probes to host. | |
3090 | ** return 0 on success, non-0 on failure | |
3091 | */ | |
3092 | int | |
3093 | MultiProbeAlternateAddress_r(struct host *host) | |
3094 | { | |
3095 | int i, j; | |
3096 | struct rx_connection **conns; | |
3097 | struct rx_connection *connSuccess = 0; | |
3098 | struct AddrPort *interfaces; | |
3099 | static struct rx_securityClass *sc = 0; | |
3100 | char hoststr[16]; | |
3101 | ||
3102 | /* nothing more can be done */ | |
3103 | if (!host->z.interface) | |
3104 | return 1; /* failure */ | |
3105 | ||
3106 | /* the only address is the primary interface */ | |
3107 | if (host->z.interface->numberOfInterfaces <= 1) | |
3108 | return 1; /* failure */ | |
3109 | ||
3110 | /* initialise a security object only once */ | |
3111 | if (!sc) | |
3112 | sc = rxnull_NewClientSecurityObject(); | |
3113 | ||
3114 | i = host->z.interface->numberOfInterfaces; | |
3115 | interfaces = calloc(i, sizeof(struct AddrPort)); | |
3116 | conns = calloc(i, sizeof(struct rx_connection *)); | |
3117 | if (!interfaces || !conns) { | |
3118 | ViceLogThenPanic(0, ("Failed malloc in " | |
3119 | "MultiProbeAlternateAddress_r\n")); | |
3120 | } | |
3121 | ||
3122 | /* initialize alternate rx connections */ | |
3123 | for (i = 0, j = 0; i < host->z.interface->numberOfInterfaces; i++) { | |
3124 | /* this is the current primary address */ | |
3125 | if (host->z.host == host->z.interface->interface[i].addr && | |
3126 | host->z.port == host->z.interface->interface[i].port) | |
3127 | continue; | |
3128 | ||
3129 | interfaces[j] = host->z.interface->interface[i]; | |
3130 | conns[j] = | |
3131 | rx_NewConnection(interfaces[j].addr, | |
3132 | interfaces[j].port, 1, sc, 0); | |
3133 | rx_SetConnDeadTime(conns[j], 2); | |
3134 | rx_SetConnHardDeadTime(conns[j], AFS_HARDDEADTIME); | |
3135 | j++; | |
3136 | } | |
3137 | ||
3138 | opr_Assert(j); /* at least one alternate address */ | |
3139 | ViceLog(125, | |
3140 | ("Starting multiprobe on all addr for host %p (%s:%d)\n", | |
3141 | host, afs_inet_ntoa_r(host->z.host, hoststr), | |
3142 | ntohs(host->z.port))); | |
3143 | H_UNLOCK; | |
3144 | multi_Rx(conns, j) { | |
3145 | multi_RXAFSCB_ProbeUuid(&host->z.interface->uuid); | |
3146 | if (!multi_error) { | |
3147 | /* first success */ | |
3148 | H_LOCK; | |
3149 | if (host->z.callback_rxcon) | |
3150 | rx_DestroyConnection(host->z.callback_rxcon); | |
3151 | host->z.callback_rxcon = conns[multi_i]; | |
3152 | /* add then remove */ | |
3153 | addInterfaceAddr_r(host, interfaces[multi_i].addr, | |
3154 | interfaces[multi_i].port); | |
3155 | removeInterfaceAddr_r(host, host->z.host, host->z.port); | |
3156 | host->z.host = interfaces[multi_i].addr; | |
3157 | host->z.port = interfaces[multi_i].port; | |
3158 | connSuccess = conns[multi_i]; | |
3159 | rx_SetConnDeadTime(host->z.callback_rxcon, 50); | |
3160 | rx_SetConnHardDeadTime(host->z.callback_rxcon, AFS_HARDDEADTIME); | |
3161 | ViceLog(125, | |
3162 | ("multiprobe success with addr %s:%d\n", | |
3163 | afs_inet_ntoa_r(interfaces[multi_i].addr, hoststr), | |
3164 | ntohs(interfaces[multi_i].port))); | |
3165 | H_UNLOCK; | |
3166 | multi_Abort; | |
3167 | } else { | |
3168 | ViceLog(125, | |
3169 | ("multiprobe failure with addr %s:%d\n", | |
3170 | afs_inet_ntoa_r(interfaces[multi_i].addr, hoststr), | |
3171 | ntohs(interfaces[multi_i].port))); | |
3172 | ||
3173 | /* This is less than desirable but its the best we can do. | |
3174 | * The AFS Cache Manager will return either 0 for a Uuid | |
3175 | * match and a 1 for a non-match. If the error is 1 we | |
3176 | * therefore know that our mapping of IP address to Uuid | |
3177 | * is wrong. We should attempt to find the correct | |
3178 | * Uuid and fix the host tables. | |
3179 | */ | |
3180 | if (multi_error == 1) { | |
3181 | /* remove the current alternate address from this host */ | |
3182 | H_LOCK; | |
3183 | removeInterfaceAddr_r(host, interfaces[multi_i].addr, interfaces[multi_i].port); | |
3184 | H_UNLOCK; | |
3185 | } | |
3186 | } | |
3187 | #ifdef AFS_DEMAND_ATTACH_FS | |
3188 | /* try to bail ASAP if the fileserver is shutting down */ | |
3189 | FS_STATE_RDLOCK; | |
3190 | if (fs_state.mode == FS_MODE_SHUTDOWN) { | |
3191 | FS_STATE_UNLOCK; | |
3192 | multi_Abort; | |
3193 | } | |
3194 | FS_STATE_UNLOCK; | |
3195 | #endif | |
3196 | } | |
3197 | multi_End_Ignore; | |
3198 | H_LOCK; | |
3199 | /* Destroy all connections except the one on which we succeeded */ | |
3200 | for (i = 0; i < j; i++) | |
3201 | if (conns[i] != connSuccess) | |
3202 | rx_DestroyConnection(conns[i]); | |
3203 | ||
3204 | free(interfaces); | |
3205 | free(conns); | |
3206 | ||
3207 | if (connSuccess) | |
3208 | return 0; /* success */ | |
3209 | else | |
3210 | return 1; /* failure */ | |
3211 | } | |
3212 | ||
3213 | #endif /* !defined(INTERPRET_DUMP) */ |